480 files changed, 39328 insertions, 6500 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 0baac25db4f8..9a542e3c9b05 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -158,6 +158,17 @@ config ETHOC
 	help
 	  Say Y here if you want to use the OpenCores 10/100 Mbps Ethernet MAC.
 
+config OA_TC6
+	tristate "OPEN Alliance TC6 10BASE-T1x MAC-PHY support"
+	depends on SPI
+	select PHYLIB
+	help
+	  This library implements OPEN Alliance TC6 10BASE-T1x MAC-PHY
+	  Serial Interface protocol for supporting 10BASE-T1x MAC-PHYs.
+
+	  To know the implementation details, refer documentation in
+	  <file:Documentation/networking/oa-tc6-framework.rst>.
+
 source "drivers/net/ethernet/packetengines/Kconfig"
 source "drivers/net/ethernet/pasemi/Kconfig"
 source "drivers/net/ethernet/pensando/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index c03203439c0e..99fa180dedb8 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -105,3 +105,4 @@ obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
 obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
 obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/
 obj-$(CONFIG_NET_VENDOR_PENSANDO) += pensando/
+obj-$(CONFIG_OA_TC6) += oa_tc6.o
diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c
index 0713f1e2c7f3..3431a7e62b0d 100644
--- a/drivers/net/ethernet/adi/adin1110.c
+++ b/drivers/net/ethernet/adi/adin1110.c
@@ -1599,7 +1599,7 @@ static int adin1110_probe_netdevs(struct adin1110_priv *priv)
 		netdev->netdev_ops = &adin1110_netdev_ops;
 		netdev->ethtool_ops = &adin1110_ethtool_ops;
 		netdev->priv_flags |= IFF_UNICAST_FLT;
-		netdev->features |= NETIF_F_NETNS_LOCAL;
+		netdev->netns_local = true;
 
 		port_priv->phydev = get_phy_device(priv->mii_bus, i + 1, false);
 		if (IS_ERR(port_priv->phydev)) {
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 78231c85234d..f62851708d4f 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -1678,17 +1678,15 @@ static int slic_init(struct slic_device *sdev)
 	slic_card_reset(sdev);
 
 	err = slic_load_firmware(sdev);
-	if (err) {
-		dev_err(&sdev->pdev->dev, "failed to load firmware\n");
-		return err;
-	}
+	if (err)
+		return dev_err_probe(&sdev->pdev->dev, err,
+			"failed to load firmware\n");
 
 	/* we need the shared memory to read EEPROM so set it up temporarily */
 	err = slic_init_shmem(sdev);
-	if (err) {
-		dev_err(&sdev->pdev->dev, "failed to init shared memory\n");
-		return err;
-	}
+	if (err)
+		return dev_err_probe(&sdev->pdev->dev, err,
+			"failed to init shared memory\n");
 
 	err = slic_read_eeprom(sdev);
 	if (err) {
@@ -1741,10 +1739,9 @@ static int slic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int err;
 
 	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "failed to enable PCI device\n");
-		return err;
-	}
+	if (err)
+		return dev_err_probe(&pdev->dev, err,
+			"failed to enable PCI device\n");
 
 	pci_set_master(pdev);
 	pci_try_set_mwi(pdev);
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 3d8ac63132fb..9e6f91df2ba0 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -1560,9 +1560,9 @@ static void ace_watchdog(struct net_device *data, unsigned int txqueue)
 }
 
 
-static void ace_tasklet(struct tasklet_struct *t)
+static void ace_bh_work(struct work_struct *work)
 {
-	struct ace_private *ap = from_tasklet(ap, t, ace_tasklet);
+	struct ace_private *ap = from_work(ap, work, ace_bh_work);
 	struct net_device *dev = ap->ndev;
 	int cur_size;
 
@@ -1595,7 +1595,7 @@ static void ace_tasklet(struct tasklet_struct *t)
 #endif
 		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
 	}
-	ap->tasklet_pending = 0;
+	ap->bh_work_pending = 0;
 }
 
 
@@ -1617,7 +1617,7 @@ static void ace_dump_trace(struct ace_private *ap)
  *
  * Loading rings is safe without holding the spin lock since this is
  * done only before the device is enabled, thus no interrupts are
- * generated and by the interrupt handler/tasklet handler.
+ * generated and by the interrupt handler/bh handler.
  */
 static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
 {
@@ -2160,7 +2160,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
 	 */
 	if (netif_running(dev)) {
 		int cur_size;
-		int run_tasklet = 0;
+		int run_bh_work = 0;
 
 		cur_size = atomic_read(&ap->cur_rx_bufs);
 		if (cur_size < RX_LOW_STD_THRES) {
@@ -2172,7 +2172,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
 				ace_load_std_rx_ring(dev,
 						     RX_RING_SIZE - cur_size);
 			} else
-				run_tasklet = 1;
+				run_bh_work = 1;
 		}
 
 		if (!ACE_IS_TIGON_I(ap)) {
@@ -2188,7 +2188,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
 					ace_load_mini_rx_ring(dev,
 							      RX_MINI_SIZE - cur_size);
 				} else
-					run_tasklet = 1;
+					run_bh_work = 1;
 			}
 		}
 
@@ -2205,12 +2205,12 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
 					ace_load_jumbo_rx_ring(dev,
 							       RX_JUMBO_SIZE - cur_size);
 				} else
-					run_tasklet = 1;
+					run_bh_work = 1;
 			}
 		}
-		if (run_tasklet && !ap->tasklet_pending) {
-			ap->tasklet_pending = 1;
-			tasklet_schedule(&ap->ace_tasklet);
+		if (run_bh_work && !ap->bh_work_pending) {
+			ap->bh_work_pending = 1;
+			queue_work(system_bh_wq, &ap->ace_bh_work);
 		}
 	}
 
@@ -2267,7 +2267,7 @@ static int ace_open(struct net_device *dev)
 	/*
 	 * Setup the bottom half rx ring refill handler
 	 */
-	tasklet_setup(&ap->ace_tasklet, ace_tasklet);
+	INIT_WORK(&ap->ace_bh_work, ace_bh_work);
 	return 0;
 }
 
@@ -2301,7 +2301,7 @@ static int ace_close(struct net_device *dev)
 	cmd.idx = 0;
 	ace_issue_cmd(regs, &cmd);
 
-	tasklet_kill(&ap->ace_tasklet);
+	cancel_work_sync(&ap->ace_bh_work);
 
 	/*
 	 * Make sure one CPU is not processing packets while
diff --git a/drivers/net/ethernet/alteon/acenic.h b/drivers/net/ethernet/alteon/acenic.h
index ca5ce0cbbad1..0e45a97b9c9b 100644
--- a/drivers/net/ethernet/alteon/acenic.h
+++ b/drivers/net/ethernet/alteon/acenic.h
@@ -2,7 +2,7 @@
 #ifndef _ACENIC_H_
 #define _ACENIC_H_
 #include <linux/interrupt.h>
-
+#include <linux/workqueue.h>
 
 /*
  * Generate TX index update each time, when TX ring is closed.
@@ -667,8 +667,8 @@ struct ace_private
 	struct rx_desc		*rx_mini_ring;
 	struct rx_desc		*rx_return_ring;
 
-	int			tasklet_pending, jumbo;
-	struct tasklet_struct	ace_tasklet;
+	int			bh_work_pending, jumbo;
+	struct work_struct	ace_bh_work;
 
 	struct event		*evt_ring;
 
@@ -776,7 +776,7 @@ static int ace_open(struct net_device *dev);
 static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev);
 static int ace_close(struct net_device *dev);
-static void ace_tasklet(struct tasklet_struct *t);
+static void ace_bh_work(struct work_struct *work);
 static void ace_dump_trace(struct ace_private *ap);
 static void ace_set_multicast_list(struct net_device *dev);
 static int ace_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 6de0d590be34..9d9fa6559354 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -7,6 +7,21 @@
 
 #define ENA_ADMIN_RSS_KEY_PARTS              10
 
+#define ENA_ADMIN_CUSTOMER_METRICS_SUPPORT_MASK 0x3F
+#define ENA_ADMIN_CUSTOMER_METRICS_MIN_SUPPORT_MASK 0x1F
+
+ /* customer metrics - in correlation with
+  * ENA_ADMIN_CUSTOMER_METRICS_SUPPORT_MASK
+  */
+enum ena_admin_customer_metrics_id {
+	ENA_ADMIN_BW_IN_ALLOWANCE_EXCEEDED         = 0,
+	ENA_ADMIN_BW_OUT_ALLOWANCE_EXCEEDED        = 1,
+	ENA_ADMIN_PPS_ALLOWANCE_EXCEEDED           = 2,
+	ENA_ADMIN_CONNTRACK_ALLOWANCE_EXCEEDED     = 3,
+	ENA_ADMIN_LINKLOCAL_ALLOWANCE_EXCEEDED     = 4,
+	ENA_ADMIN_CONNTRACK_ALLOWANCE_AVAILABLE    = 5,
+};
+
 enum ena_admin_aq_opcode {
 	ENA_ADMIN_CREATE_SQ                         = 1,
 	ENA_ADMIN_DESTROY_SQ                        = 2,
@@ -51,6 +66,9 @@ enum ena_admin_aq_feature_id {
 /* device capabilities */
 enum ena_admin_aq_caps_id {
 	ENA_ADMIN_ENI_STATS                         = 0,
+	/* ENA SRD customer metrics */
+	ENA_ADMIN_ENA_SRD_INFO                      = 1,
+	ENA_ADMIN_CUSTOMER_METRICS                  = 2,
 };
 
 enum ena_admin_placement_policy_type {
@@ -99,6 +117,9 @@ enum ena_admin_get_stats_type {
 	ENA_ADMIN_GET_STATS_TYPE_EXTENDED           = 1,
 	/* extra HW stats for specific network interface */
 	ENA_ADMIN_GET_STATS_TYPE_ENI                = 2,
+	/* extra HW stats for ENA SRD */
+	ENA_ADMIN_GET_STATS_TYPE_ENA_SRD            = 3,
+	ENA_ADMIN_GET_STATS_TYPE_CUSTOMER_METRICS   = 4,
 };
 
 enum ena_admin_get_stats_scope {
@@ -106,6 +127,16 @@ enum ena_admin_get_stats_scope {
 	ENA_ADMIN_ETH_TRAFFIC                       = 1,
 };
 
+/* ENA SRD configuration for ENI */
+enum ena_admin_ena_srd_flags {
+	/* Feature enabled */
+	ENA_ADMIN_ENA_SRD_ENABLED                   = BIT(0),
+	/* UDP support enabled */
+	ENA_ADMIN_ENA_SRD_UDP_ENABLED               = BIT(1),
+	/* Bypass Rx UDP ordering */
+	ENA_ADMIN_ENA_SRD_UDP_ORDERING_BYPASS_ENABLED = BIT(2),
+};
+
 struct ena_admin_aq_common_desc {
 	/* 11:0 : command_id
 	 * 15:12 : reserved12
@@ -363,6 +394,9 @@ struct ena_admin_aq_get_stats_cmd {
 	 * stats of other device
 	 */
 	u16 device_id;
+
+	/* a bitmap representing the requested metric values */
+	u64 requested_metrics;
 };
 
 /* Basic Statistics Command. */
@@ -419,6 +453,40 @@ struct ena_admin_eni_stats {
 	u64 linklocal_allowance_exceeded;
 };
 
+struct ena_admin_ena_srd_stats {
+	/* Number of packets transmitted over ENA SRD */
+	u64 ena_srd_tx_pkts;
+
+	/* Number of packets transmitted or could have been
+	 * transmitted over ENA SRD
+	 */
+	u64 ena_srd_eligible_tx_pkts;
+
+	/* Number of packets received over ENA SRD */
+	u64 ena_srd_rx_pkts;
+
+	/* Percentage of the ENA SRD resources that is in use */
+	u64 ena_srd_resource_utilization;
+};
+
+/* ENA SRD Statistics Command */
+struct ena_admin_ena_srd_info {
+	/* ENA SRD configuration bitmap. See ena_admin_ena_srd_flags for
+	 * details
+	 */
+	u64 flags;
+
+	struct ena_admin_ena_srd_stats ena_srd_stats;
+};
+
+/* Customer Metrics Command. */
+struct ena_admin_customer_metrics {
+	/* A bitmap representing the reported customer metrics according to
+	 * the order they are reported
+	 */
+	u64 reported_metrics;
+};
+
 struct ena_admin_acq_get_stats_resp {
 	struct ena_admin_acq_common_desc acq_common_desc;
 
@@ -428,6 +496,10 @@ struct ena_admin_acq_get_stats_resp {
 		struct ena_admin_basic_stats basic_stats;
 
 		struct ena_admin_eni_stats eni_stats;
+
+		struct ena_admin_ena_srd_info ena_srd_info;
+
+		struct ena_admin_customer_metrics customer_metrics;
 	} u;
 };
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 713a595370bf..d958cda9e58b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1881,6 +1881,56 @@ int ena_com_get_link_params(struct ena_com_dev *ena_dev,
 	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0);
 }
 
+static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
+			     struct ena_com_stats_ctx *ctx,
+			     enum ena_admin_get_stats_type type)
+{
+	struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
+	struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
+	struct ena_com_admin_queue *admin_queue;
+	int ret;
+
+	admin_queue = &ena_dev->admin_queue;
+
+	get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
+	get_cmd->aq_common_descriptor.flags = 0;
+	get_cmd->type = type;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)get_cmd,
+					    sizeof(*get_cmd),
+					    (struct ena_admin_acq_entry *)get_resp,
+					    sizeof(*get_resp));
+
+	if (unlikely(ret))
+		netdev_err(ena_dev->net_device, "Failed to get stats. error: %d\n", ret);
+
+	return ret;
+}
+
+static void ena_com_set_supported_customer_metrics(struct ena_com_dev *ena_dev)
+{
+	struct ena_customer_metrics *customer_metrics;
+	struct ena_com_stats_ctx ctx;
+	int ret;
+
+	customer_metrics = &ena_dev->customer_metrics;
+	if (!ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) {
+		customer_metrics->supported_metrics = ENA_ADMIN_CUSTOMER_METRICS_MIN_SUPPORT_MASK;
+		return;
+	}
+
+	memset(&ctx, 0x0, sizeof(ctx));
+	ctx.get_cmd.requested_metrics = ENA_ADMIN_CUSTOMER_METRICS_SUPPORT_MASK;
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_CUSTOMER_METRICS);
+	if (likely(ret == 0))
+		customer_metrics->supported_metrics =
+			ctx.get_resp.u.customer_metrics.reported_metrics;
+	else
+		netdev_err(ena_dev->net_device,
+			   "Failed to query customer metrics support. error: %d\n", ret);
+}
+
 int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
@@ -1960,6 +2010,8 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	else
 		return rc;
 
+	ena_com_set_supported_customer_metrics(ena_dev);
+
 	return 0;
 }
 
@@ -2104,50 +2156,44 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev,
 	return 0;
 }
 
-static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
-			     struct ena_com_stats_ctx *ctx,
-			     enum ena_admin_get_stats_type type)
+int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
+			  struct ena_admin_eni_stats *stats)
 {
-	struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
-	struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
-	struct ena_com_admin_queue *admin_queue;
+	struct ena_com_stats_ctx ctx;
 	int ret;
 
-	admin_queue = &ena_dev->admin_queue;
-
-	get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
-	get_cmd->aq_common_descriptor.flags = 0;
-	get_cmd->type = type;
-
-	ret =  ena_com_execute_admin_command(admin_queue,
-					     (struct ena_admin_aq_entry *)get_cmd,
-					     sizeof(*get_cmd),
-					     (struct ena_admin_acq_entry *)get_resp,
-					     sizeof(*get_resp));
+	if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
+		netdev_err(ena_dev->net_device, "Capability %d isn't supported\n",
+			   ENA_ADMIN_ENI_STATS);
+		return -EOPNOTSUPP;
+	}
 
-	if (unlikely(ret))
-		netdev_err(ena_dev->net_device, "Failed to get stats. error: %d\n", ret);
+	memset(&ctx, 0x0, sizeof(ctx));
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI);
+	if (likely(ret == 0))
+		memcpy(stats, &ctx.get_resp.u.eni_stats,
+		       sizeof(ctx.get_resp.u.eni_stats));
 
 	return ret;
 }
 
-int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
-			  struct ena_admin_eni_stats *stats)
+int ena_com_get_ena_srd_info(struct ena_com_dev *ena_dev,
+			     struct ena_admin_ena_srd_info *info)
 {
 	struct ena_com_stats_ctx ctx;
 	int ret;
 
-	if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
+	if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENA_SRD_INFO)) {
 		netdev_err(ena_dev->net_device, "Capability %d isn't supported\n",
-			   ENA_ADMIN_ENI_STATS);
+			   ENA_ADMIN_ENA_SRD_INFO);
 		return -EOPNOTSUPP;
 	}
 
 	memset(&ctx, 0x0, sizeof(ctx));
-	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI);
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENA_SRD);
 	if (likely(ret == 0))
-		memcpy(stats, &ctx.get_resp.u.eni_stats,
-		       sizeof(ctx.get_resp.u.eni_stats));
+		memcpy(info, &ctx.get_resp.u.ena_srd_info,
+		       sizeof(ctx.get_resp.u.ena_srd_info));
 
 	return ret;
 }
@@ -2167,6 +2213,50 @@ int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
 	return ret;
 }
 
+int ena_com_get_customer_metrics(struct ena_com_dev *ena_dev, char *buffer, u32 len)
+{
+	struct ena_admin_aq_get_stats_cmd *get_cmd;
+	struct ena_com_stats_ctx ctx;
+	int ret;
+
+	if (unlikely(len > ena_dev->customer_metrics.buffer_len)) {
+		netdev_err(ena_dev->net_device,
+			   "Invalid buffer size %u. The given buffer is too big.\n", len);
+		return -EINVAL;
+	}
+
+	if (!ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) {
+		netdev_err(ena_dev->net_device, "Capability %d not supported.\n",
+			   ENA_ADMIN_CUSTOMER_METRICS);
+		return -EOPNOTSUPP;
+	}
+
+	if (!ena_dev->customer_metrics.supported_metrics) {
+		netdev_err(ena_dev->net_device, "No supported customer metrics.\n");
+		return -EOPNOTSUPP;
+	}
+
+	get_cmd = &ctx.get_cmd;
+	memset(&ctx, 0x0, sizeof(ctx));
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &get_cmd->u.control_buffer.address,
+				   ena_dev->customer_metrics.buffer_dma_addr);
+	if (unlikely(ret)) {
+		netdev_err(ena_dev->net_device, "Memory address set failed.\n");
+		return ret;
+	}
+
+	get_cmd->u.control_buffer.length = ena_dev->customer_metrics.buffer_len;
+	get_cmd->requested_metrics = ena_dev->customer_metrics.supported_metrics;
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_CUSTOMER_METRICS);
+	if (likely(ret == 0))
+		memcpy(buffer, ena_dev->customer_metrics.buffer_virt_addr, len);
+	else
+		netdev_err(ena_dev->net_device, "Failed to get customer metrics. error: %d\n", ret);
+
+	return ret;
+}
+
 int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu)
 {
 	struct ena_com_admin_queue *admin_queue;
@@ -2706,6 +2796,24 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
 	return 0;
 }
 
+int ena_com_allocate_customer_metrics_buffer(struct ena_com_dev *ena_dev)
+{
+	struct ena_customer_metrics *customer_metrics = &ena_dev->customer_metrics;
+
+	customer_metrics->buffer_len = ENA_CUSTOMER_METRICS_BUFFER_SIZE;
+	customer_metrics->buffer_virt_addr = NULL;
+
+	customer_metrics->buffer_virt_addr =
+		dma_alloc_coherent(ena_dev->dmadev, customer_metrics->buffer_len,
+				   &customer_metrics->buffer_dma_addr, GFP_KERNEL);
+	if (!customer_metrics->buffer_virt_addr) {
+		customer_metrics->buffer_len = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 void ena_com_delete_host_info(struct ena_com_dev *ena_dev)
 {
 	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
@@ -2728,6 +2836,19 @@ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
 	}
 }
 
+void ena_com_delete_customer_metrics_buffer(struct ena_com_dev *ena_dev)
+{
+	struct ena_customer_metrics *customer_metrics = &ena_dev->customer_metrics;
+
+	if (customer_metrics->buffer_virt_addr) {
+		dma_free_coherent(ena_dev->dmadev, customer_metrics->buffer_len,
+				  customer_metrics->buffer_virt_addr,
+				  customer_metrics->buffer_dma_addr);
+		customer_metrics->buffer_virt_addr = NULL;
+		customer_metrics->buffer_len = 0;
+	}
+}
+
 int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
 {
 	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 924f03f5a6c7..a372c5e768a7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -42,6 +42,8 @@
 #define ADMIN_CQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_acq_entry))
 #define ADMIN_AENQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_aenq_entry))
 
+#define ENA_CUSTOMER_METRICS_BUFFER_SIZE 512
+
 /*****************************************************************************/
 /*****************************************************************************/
 /* ENA adaptive interrupt moderation settings */
@@ -278,6 +280,16 @@ struct ena_rss {
 
 };
 
+struct ena_customer_metrics {
+	/* in correlation with ENA_ADMIN_CUSTOMER_METRICS_SUPPORT_MASK
+	 * and ena_admin_customer_metrics_id
+	 */
+	u64 supported_metrics;
+	dma_addr_t buffer_dma_addr;
+	void *buffer_virt_addr;
+	u32 buffer_len;
+};
+
 struct ena_host_attribute {
 	/* Debug area */
 	u8 *debug_area_virt_addr;
@@ -327,6 +339,8 @@ struct ena_com_dev {
 	struct ena_intr_moder_entry *intr_moder_tbl;
 
 	struct ena_com_llq_info llq_info;
+
+	struct ena_customer_metrics customer_metrics;
 };
 
 struct ena_com_dev_get_features_ctx {
@@ -595,6 +609,24 @@ int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
 int ena_com_get_eni_stats(struct ena_com_dev *ena_dev,
 			  struct ena_admin_eni_stats *stats);
 
+/* ena_com_get_ena_srd_info - Get ENA SRD network interface statistics
+ * @ena_dev: ENA communication layer struct
+ * @info: ena srd stats and flags
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_ena_srd_info(struct ena_com_dev *ena_dev,
+			     struct ena_admin_ena_srd_info *info);
+
+/* ena_com_get_customer_metrics - Get customer metrics for network interface
+ * @ena_dev: ENA communication layer struct
+ * @buffer: buffer for returned customer metrics
+ * @len: size of the buffer
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_customer_metrics(struct ena_com_dev *ena_dev, char *buffer, u32 len);
+
 /* ena_com_set_dev_mtu - Configure the device mtu.
  * @ena_dev: ENA communication layer struct
  * @mtu: mtu value
@@ -805,6 +837,13 @@ int ena_com_allocate_host_info(struct ena_com_dev *ena_dev);
 int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
 				u32 debug_area_size);
 
+/* ena_com_allocate_customer_metrics_buffer - Allocate customer metrics resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_customer_metrics_buffer(struct ena_com_dev *ena_dev);
+
 /* ena_com_delete_debug_area - Free the debug area resources.
  * @ena_dev: ENA communication layer struct
  *
@@ -819,6 +858,13 @@ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
  */
 void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
 
+/* ena_com_delete_customer_metrics_buffer - Free the customer metrics resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocated customer metrics area.
+ */
+void ena_com_delete_customer_metrics_buffer(struct ena_com_dev *ena_dev);
+
 /* ena_com_set_host_attributes - Update the device with the host
  * attributes (debug area and host info) base address.
  * @ena_dev: ENA communication layer struct
@@ -975,6 +1021,28 @@ static inline bool ena_com_get_cap(struct ena_com_dev *ena_dev,
 	return !!(ena_dev->capabilities & BIT(cap_id));
 }
 
+/* ena_com_get_customer_metric_support - query whether device supports a given customer metric.
+ * @ena_dev: ENA communication layer struct
+ * @metric_id: enum value representing the customer metric
+ *
+ * @return - true if customer metric is supported or false otherwise
+ */
+static inline bool ena_com_get_customer_metric_support(struct ena_com_dev *ena_dev,
+						       enum ena_admin_customer_metrics_id metric_id)
+{
+	return !!(ena_dev->customer_metrics.supported_metrics & BIT(metric_id));
+}
+
+/* ena_com_get_customer_metric_count - return the number of supported customer metrics.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - the number of supported customer metrics
+ */
+static inline int ena_com_get_customer_metric_count(struct ena_com_dev *ena_dev)
+{
+	return hweight64(ena_dev->customer_metrics.supported_metrics);
+}
+
 /* ena_com_update_intr_reg - Prepare interrupt register
  * @intr_reg: interrupt register to update.
  * @rx_delay_interval: Rx interval in usecs
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index b24cc3f05248..60fb35ec4b15 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -14,6 +14,10 @@ struct ena_stats {
 	int stat_offset;
 };
 
+struct ena_hw_metrics {
+	char name[ETH_GSTRING_LEN];
+};
+
 #define ENA_STAT_ENA_COM_ENTRY(stat) { \
 	.name = #stat, \
 	.stat_offset = offsetof(struct ena_com_stats_admin, stat) / sizeof(u64) \
@@ -41,6 +45,18 @@ struct ena_stats {
 #define ENA_STAT_ENI_ENTRY(stat) \
 	ENA_STAT_HW_ENTRY(stat, eni_stats)
 
+#define ENA_STAT_ENA_SRD_ENTRY(stat) \
+	ENA_STAT_HW_ENTRY(stat, ena_srd_stats)
+
+#define ENA_STAT_ENA_SRD_MODE_ENTRY(stat) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_admin_ena_srd_info, flags) / sizeof(u64) \
+}
+
+#define ENA_METRIC_ENI_ENTRY(stat) { \
+	.name = #stat \
+}
+
 static const struct ena_stats ena_stats_global_strings[] = {
 	ENA_STAT_GLOBAL_ENTRY(tx_timeout),
 	ENA_STAT_GLOBAL_ENTRY(suspend),
@@ -52,6 +68,9 @@ static const struct ena_stats ena_stats_global_strings[] = {
 	ENA_STAT_GLOBAL_ENTRY(reset_fail),
 };
 
+/* A partial list of hw stats. Used when admin command
+ * with type ENA_ADMIN_GET_STATS_TYPE_CUSTOMER_METRICS is not supported
+ */
 static const struct ena_stats ena_stats_eni_strings[] = {
 	ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded),
 	ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded),
@@ -60,6 +79,23 @@ static const struct ena_stats ena_stats_eni_strings[] = {
 	ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded),
 };
 
+static const struct ena_hw_metrics ena_hw_stats_strings[] = {
+	ENA_METRIC_ENI_ENTRY(bw_in_allowance_exceeded),
+	ENA_METRIC_ENI_ENTRY(bw_out_allowance_exceeded),
+	ENA_METRIC_ENI_ENTRY(pps_allowance_exceeded),
+	ENA_METRIC_ENI_ENTRY(conntrack_allowance_exceeded),
+	ENA_METRIC_ENI_ENTRY(linklocal_allowance_exceeded),
+	ENA_METRIC_ENI_ENTRY(conntrack_allowance_available),
+};
+
+static const struct ena_stats ena_srd_info_strings[] = {
+	ENA_STAT_ENA_SRD_MODE_ENTRY(ena_srd_mode),
+	ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts),
+	ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts),
+	ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts),
+	ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization)
+};
+
 static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(cnt),
 	ENA_STAT_TX_ENTRY(bytes),
@@ -112,7 +148,9 @@ static const struct ena_stats ena_stats_ena_com_strings[] = {
 #define ENA_STATS_ARRAY_TX		ARRAY_SIZE(ena_stats_tx_strings)
 #define ENA_STATS_ARRAY_RX		ARRAY_SIZE(ena_stats_rx_strings)
 #define ENA_STATS_ARRAY_ENA_COM		ARRAY_SIZE(ena_stats_ena_com_strings)
-#define ENA_STATS_ARRAY_ENI(adapter)	ARRAY_SIZE(ena_stats_eni_strings)
+#define ENA_STATS_ARRAY_ENI		ARRAY_SIZE(ena_stats_eni_strings)
+#define ENA_STATS_ARRAY_ENA_SRD		ARRAY_SIZE(ena_srd_info_strings)
+#define ENA_METRICS_ARRAY_ENI		ARRAY_SIZE(ena_hw_stats_strings)
 
 static void ena_safe_update_stat(u64 *src, u64 *dst,
 				 struct u64_stats_sync *syncp)
@@ -125,6 +163,57 @@ static void ena_safe_update_stat(u64 *src, u64 *dst,
 	} while (u64_stats_fetch_retry(syncp, start));
 }
 
+static void ena_metrics_stats(struct ena_adapter *adapter, u64 **data)
+{
+	struct ena_com_dev *dev = adapter->ena_dev;
+	const struct ena_stats *ena_stats;
+	u64 *ptr;
+	int i;
+
+	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS)) {
+		u32 supported_metrics_count;
+		int len;
+
+		supported_metrics_count = ena_com_get_customer_metric_count(dev);
+		len = supported_metrics_count * sizeof(u64);
+
+		/* Fill the data buffer, and advance its pointer */
+		ena_com_get_customer_metrics(dev, (char *)(*data), len);
+		(*data) += supported_metrics_count;
+
+	} else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS)) {
+		ena_com_get_eni_stats(dev, &adapter->eni_stats);
+		/* Updating regardless of rc - once we told ethtool how many stats we have
+		 * it will print that much stats. We can't leave holes in the stats
+		 */
+		for (i = 0; i < ENA_STATS_ARRAY_ENI; i++) {
+			ena_stats = &ena_stats_eni_strings[i];
+
+			ptr = (u64 *)&adapter->eni_stats +
+				ena_stats->stat_offset;
+
+			ena_safe_update_stat(ptr, (*data)++, &adapter->syncp);
+		}
+	}
+
+	if (ena_com_get_cap(dev, ENA_ADMIN_ENA_SRD_INFO)) {
+		ena_com_get_ena_srd_info(dev, &adapter->ena_srd_info);
+		/* Get ENA SRD mode */
+		ptr = (u64 *)&adapter->ena_srd_info;
+		ena_safe_update_stat(ptr, (*data)++, &adapter->syncp);
+		for (i = 1; i < ENA_STATS_ARRAY_ENA_SRD; i++) {
+			ena_stats = &ena_srd_info_strings[i];
+			/* Wrapped within an outer struct - need to accommodate an
+			 * additional offset of the ENA SRD mode that was already processed
+			 */
+			ptr = (u64 *)&adapter->ena_srd_info +
+				ena_stats->stat_offset + 1;
+
+			ena_safe_update_stat(ptr, (*data)++, &adapter->syncp);
+		}
+	}
+}
+
 static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
 {
 	const struct ena_stats *ena_stats;
@@ -179,7 +268,7 @@ static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
 
 static void ena_get_stats(struct ena_adapter *adapter,
 			  u64 *data,
-			  bool eni_stats_needed)
+			  bool hw_stats_needed)
 {
 	const struct ena_stats *ena_stats;
 	u64 *ptr;
@@ -193,17 +282,8 @@ static void ena_get_stats(struct ena_adapter *adapter,
 		ena_safe_update_stat(ptr, data++, &adapter->syncp);
 	}
 
-	if (eni_stats_needed) {
-		ena_update_hw_stats(adapter);
-		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
-			ena_stats = &ena_stats_eni_strings[i];
-
-			ptr = (u64 *)&adapter->eni_stats +
-				ena_stats->stat_offset;
-
-			ena_safe_update_stat(ptr, data++, &adapter->syncp);
-		}
-	}
+	if (hw_stats_needed)
+		ena_metrics_stats(adapter, &data);
 
 	ena_queue_stats(adapter, &data);
 	ena_dev_admin_queue_stats(adapter, &data);
@@ -214,9 +294,8 @@ static void ena_get_ethtool_stats(struct net_device *netdev,
 				  u64 *data)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
-	struct ena_com_dev *dev = adapter->ena_dev;
 
-	ena_get_stats(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
+	ena_get_stats(adapter, data, true);
 }
 
 static int ena_get_sw_stats_count(struct ena_adapter *adapter)
@@ -228,9 +307,17 @@ static int ena_get_sw_stats_count(struct ena_adapter *adapter)
 
 static int ena_get_hw_stats_count(struct ena_adapter *adapter)
 {
-	bool supported = ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS);
+	struct ena_com_dev *dev = adapter->ena_dev;
+	int count;
+
+	count = ENA_STATS_ARRAY_ENA_SRD * ena_com_get_cap(dev, ENA_ADMIN_ENA_SRD_INFO);
 
-	return ENA_STATS_ARRAY_ENI(adapter) * supported;
+	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
+		count += ena_com_get_customer_metric_count(dev);
+	else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
+		count += ENA_STATS_ARRAY_ENI;
+
+	return count;
 }
 
 int ena_get_sset_count(struct net_device *netdev, int sset)
@@ -246,6 +333,35 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
 	return -EOPNOTSUPP;
 }
 
+static void ena_metrics_stats_strings(struct ena_adapter *adapter, u8 **data)
+{
+	struct ena_com_dev *dev = adapter->ena_dev;
+	const struct ena_hw_metrics *ena_metrics;
+	const struct ena_stats *ena_stats;
+	int i;
+
+	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS)) {
+		for (i = 0; i < ENA_METRICS_ARRAY_ENI; i++) {
+			if (ena_com_get_customer_metric_support(dev, i)) {
+				ena_metrics = &ena_hw_stats_strings[i];
+				ethtool_puts(data, ena_metrics->name);
+			}
+		}
+	} else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS)) {
+		for (i = 0; i < ENA_STATS_ARRAY_ENI; i++) {
+			ena_stats = &ena_stats_eni_strings[i];
+			ethtool_puts(data, ena_stats->name);
+		}
+	}
+
+	if (ena_com_get_cap(dev, ENA_ADMIN_ENA_SRD_INFO)) {
+		for (i = 0; i < ENA_STATS_ARRAY_ENA_SRD; i++) {
+			ena_stats = &ena_srd_info_strings[i];
+			ethtool_puts(data, ena_stats->name);
+		}
+	}
+}
+
 static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 {
 	const struct ena_stats *ena_stats;
@@ -291,7 +407,7 @@ static void ena_com_dev_strings(u8 **data)
 
 static void ena_get_strings(struct ena_adapter *adapter,
 			    u8 *data,
-			    bool eni_stats_needed)
+			    bool hw_stats_needed)
 {
 	const struct ena_stats *ena_stats;
 	int i;
@@ -301,12 +417,8 @@ static void ena_get_strings(struct ena_adapter *adapter,
 		ethtool_puts(&data, ena_stats->name);
 	}
 
-	if (eni_stats_needed) {
-		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
-			ena_stats = &ena_stats_eni_strings[i];
-			ethtool_puts(&data, ena_stats->name);
-		}
-	}
+	if (hw_stats_needed)
+		ena_metrics_stats_strings(adapter, &data);
 
 	ena_queue_strings(adapter, &data);
 	ena_com_dev_strings(&data);
@@ -317,11 +429,10 @@ static void ena_get_ethtool_strings(struct net_device *netdev,
 				    u8 *data)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
-	struct ena_com_dev *dev = adapter->ena_dev;
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		ena_get_strings(adapter, data, ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS));
+		ena_get_strings(adapter, data, true);
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 184b6e6cbed4..c5b50cfa935a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2798,19 +2798,6 @@ err:
 	ena_com_delete_debug_area(adapter->ena_dev);
 }
 
-int ena_update_hw_stats(struct ena_adapter *adapter)
-{
-	int rc;
-
-	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
-	if (rc) {
-		netdev_err(adapter->netdev, "Failed to get ENI stats\n");
-		return rc;
-	}
-
-	return 0;
-}
-
 static void ena_get_stats64(struct net_device *netdev,
 			    struct rtnl_link_stats64 *stats)
 {
@@ -3944,10 +3931,16 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, adapter);
 
+	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
+	if (rc) {
+		netdev_err(netdev, "ena_com_allocate_customer_metrics_buffer failed\n");
+		goto err_netdev_destroy;
+	}
+
 	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
 	if (rc) {
 		dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n");
-		goto err_netdev_destroy;
+		goto err_metrics_destroy;
 	}
 
 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state);
@@ -3955,7 +3948,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(&pdev->dev, "ENA device init failed\n");
 		if (rc == -ETIME)
 			rc = -EPROBE_DEFER;
-		goto err_netdev_destroy;
+		goto err_metrics_destroy;
 	}
 
 	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
@@ -4076,6 +4069,8 @@ err_worker_destroy:
 err_device_destroy:
 	ena_com_delete_host_info(ena_dev);
 	ena_com_admin_destroy(ena_dev);
+err_metrics_destroy:
+	ena_com_delete_customer_metrics_buffer(ena_dev);
 err_netdev_destroy:
 	free_netdev(netdev);
 err_free_region:
@@ -4139,6 +4134,8 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
 
 	ena_com_delete_host_info(ena_dev);
 
+	ena_com_delete_customer_metrics_buffer(ena_dev);
+
 	ena_release_bars(ena_dev, pdev);
 
 	pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index d59509747d1a..6e12ae3b12e5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -373,6 +373,7 @@ struct ena_adapter {
 	struct u64_stats_sync syncp;
 	struct ena_stats_dev dev_stats;
 	struct ena_admin_eni_stats eni_stats;
+	struct ena_admin_ena_srd_info ena_srd_info;
 
 	/* last queue index that was checked for uncompleted tx packets */
 	u32 last_monitored_tx_qid;
@@ -390,7 +391,6 @@ void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
 
 void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 
-int ena_update_hw_stats(struct ena_adapter *adapter);
 
 int ena_update_queue_params(struct ena_adapter *adapter,
 			    u32 new_tx_size,
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
index 6bdd02b7aa6d..ac37a4e738ae 100644
--- a/drivers/net/ethernet/amd/pds_core/debugfs.c
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -112,7 +112,7 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
 	struct pdsc_cq *cq = &qcq->cq;
 
 	qcq_dentry = debugfs_create_dir(q->name, pdsc->dentry);
-	if (IS_ERR_OR_NULL(qcq_dentry))
+	if (IS_ERR(qcq_dentry))
 		return;
 	qcq->dentry = qcq_dentry;
 
@@ -123,7 +123,7 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
 	debugfs_create_x32("accum_work", 0400, qcq_dentry, &qcq->accum_work);
 
 	q_dentry = debugfs_create_dir("q", qcq->dentry);
-	if (IS_ERR_OR_NULL(q_dentry))
+	if (IS_ERR(q_dentry))
 		return;
 
 	debugfs_create_u32("index", 0400, q_dentry, &q->index);
@@ -135,7 +135,7 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
 	debugfs_create_u16("head", 0400, q_dentry, &q->head_idx);
 
 	cq_dentry = debugfs_create_dir("cq", qcq->dentry);
-	if (IS_ERR_OR_NULL(cq_dentry))
+	if (IS_ERR(cq_dentry))
 		return;
 
 	debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
@@ -148,7 +148,7 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
 		struct pdsc_intr_info *intr = &pdsc->intr_info[qcq->intx];
 
 		intr_dentry = debugfs_create_dir("intr", qcq->dentry);
-		if (IS_ERR_OR_NULL(intr_dentry))
+		if (IS_ERR(intr_dentry))
 			return;
 
 		debugfs_create_u32("index", 0400, intr_dentry, &intr->index);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c4a4e316683f..5475867708f4 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -403,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
 	return false;
 }
 
-static void xgbe_ecc_isr_task(struct tasklet_struct *t)
+static void xgbe_ecc_isr_bh_work(struct work_struct *work)
 {
-	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_ecc);
+	struct xgbe_prv_data *pdata = from_work(pdata, work, ecc_bh_work);
 	unsigned int ecc_isr;
 	bool stop = false;
 
@@ -465,17 +465,17 @@ static irqreturn_t xgbe_ecc_isr(int irq, void *data)
 {
 	struct xgbe_prv_data *pdata = data;
 
-	if (pdata->isr_as_tasklet)
-		tasklet_schedule(&pdata->tasklet_ecc);
+	if (pdata->isr_as_bh_work)
+		queue_work(system_bh_wq, &pdata->ecc_bh_work);
 	else
-		xgbe_ecc_isr_task(&pdata->tasklet_ecc);
+		xgbe_ecc_isr_bh_work(&pdata->ecc_bh_work);
 
 	return IRQ_HANDLED;
 }
 
-static void xgbe_isr_task(struct tasklet_struct *t)
+static void xgbe_isr_bh_work(struct work_struct *work)
 {
-	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_dev);
+	struct xgbe_prv_data *pdata = from_work(pdata, work, dev_bh_work);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
 	unsigned int dma_isr, dma_ch_isr;
@@ -582,7 +582,7 @@ isr_done:
 
 	/* If there is not a separate ECC irq, handle it here */
 	if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
-		xgbe_ecc_isr_task(&pdata->tasklet_ecc);
+		xgbe_ecc_isr_bh_work(&pdata->ecc_bh_work);
 
 	/* If there is not a separate I2C irq, handle it here */
 	if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
@@ -604,10 +604,10 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 {
 	struct xgbe_prv_data *pdata = data;
 
-	if (pdata->isr_as_tasklet)
-		tasklet_schedule(&pdata->tasklet_dev);
+	if (pdata->isr_as_bh_work)
+		queue_work(system_bh_wq, &pdata->dev_bh_work);
 	else
-		xgbe_isr_task(&pdata->tasklet_dev);
+		xgbe_isr_bh_work(&pdata->dev_bh_work);
 
 	return IRQ_HANDLED;
 }
@@ -1007,8 +1007,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 	unsigned int i;
 	int ret;
 
-	tasklet_setup(&pdata->tasklet_dev, xgbe_isr_task);
-	tasklet_setup(&pdata->tasklet_ecc, xgbe_ecc_isr_task);
+	INIT_WORK(&pdata->dev_bh_work, xgbe_isr_bh_work);
+	INIT_WORK(&pdata->ecc_bh_work, xgbe_ecc_isr_bh_work);
 
 	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
 			       netdev_name(netdev), pdata);
@@ -1078,8 +1078,8 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
 
 	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
-	tasklet_kill(&pdata->tasklet_dev);
-	tasklet_kill(&pdata->tasklet_ecc);
+	cancel_work_sync(&pdata->dev_bh_work);
+	cancel_work_sync(&pdata->ecc_bh_work);
 
 	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
 		devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 21407a26f806..5fc94c2f638e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -582,16 +582,12 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
 	ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				   SOF_TIMESTAMPING_RX_SOFTWARE |
-				   SOF_TIMESTAMPING_SOFTWARE |
 				   SOF_TIMESTAMPING_TX_HARDWARE |
 				   SOF_TIMESTAMPING_RX_HARDWARE |
 				   SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (pdata->ptp_clock)
 		ts_info->phc_index = ptp_clock_index(pdata->ptp_clock);
-	else
-		ts_info->phc_index = -1;
 
 	ts_info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
 	ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index a9ccc4258ee5..7a833894f52a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,9 +274,9 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
 		XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
 }
 
-static void xgbe_i2c_isr_task(struct tasklet_struct *t)
+static void xgbe_i2c_isr_bh_work(struct work_struct *work)
 {
-	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_i2c);
+	struct xgbe_prv_data *pdata = from_work(pdata, work, i2c_bh_work);
 	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
 	unsigned int isr;
 
@@ -321,10 +321,10 @@ static irqreturn_t xgbe_i2c_isr(int irq, void *data)
 {
 	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 
-	if (pdata->isr_as_tasklet)
-		tasklet_schedule(&pdata->tasklet_i2c);
+	if (pdata->isr_as_bh_work)
+		queue_work(system_bh_wq, &pdata->i2c_bh_work);
 	else
-		xgbe_i2c_isr_task(&pdata->tasklet_i2c);
+		xgbe_i2c_isr_bh_work(&pdata->i2c_bh_work);
 
 	return IRQ_HANDLED;
 }
@@ -369,7 +369,7 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
 
 static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
 {
-	xgbe_i2c_isr_task(&pdata->tasklet_i2c);
+	xgbe_i2c_isr_bh_work(&pdata->i2c_bh_work);
 
 	return IRQ_HANDLED;
 }
@@ -449,7 +449,7 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
 
 	if (pdata->dev_irq != pdata->i2c_irq) {
 		devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
-		tasklet_kill(&pdata->tasklet_i2c);
+		cancel_work_sync(&pdata->i2c_bh_work);
 	}
 }
 
@@ -464,7 +464,7 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate I2C irq, enable it */
 	if (pdata->dev_irq != pdata->i2c_irq) {
-		tasklet_setup(&pdata->tasklet_i2c, xgbe_i2c_isr_task);
+		INIT_WORK(&pdata->i2c_bh_work, xgbe_i2c_isr_bh_work);
 
 		ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
 				       xgbe_i2c_isr, 0, pdata->i2c_name,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 4a2dc705b528..07f4f3418d01 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -703,9 +703,9 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
 	}
 }
 
-static void xgbe_an_isr_task(struct tasklet_struct *t)
+static void xgbe_an_isr_bh_work(struct work_struct *work)
 {
-	struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_an);
+	struct xgbe_prv_data *pdata = from_work(pdata, work, an_bh_work);
 
 	netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
 
@@ -727,17 +727,17 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
 {
 	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 
-	if (pdata->isr_as_tasklet)
-		tasklet_schedule(&pdata->tasklet_an);
+	if (pdata->isr_as_bh_work)
+		queue_work(system_bh_wq, &pdata->an_bh_work);
 	else
-		xgbe_an_isr_task(&pdata->tasklet_an);
+		xgbe_an_isr_bh_work(&pdata->an_bh_work);
 
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
 {
-	xgbe_an_isr_task(&pdata->tasklet_an);
+	xgbe_an_isr_bh_work(&pdata->an_bh_work);
 
 	return IRQ_HANDLED;
 }
@@ -1454,7 +1454,7 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
 
 	if (pdata->dev_irq != pdata->an_irq) {
 		devm_free_irq(pdata->dev, pdata->an_irq, pdata);
-		tasklet_kill(&pdata->tasklet_an);
+		cancel_work_sync(&pdata->an_bh_work);
 	}
 
 	pdata->phy_if.phy_impl.stop(pdata);
@@ -1477,7 +1477,7 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate AN irq, enable it */
 	if (pdata->dev_irq != pdata->an_irq) {
-		tasklet_setup(&pdata->tasklet_an, xgbe_an_isr_task);
+		INIT_WORK(&pdata->an_bh_work, xgbe_an_isr_bh_work);
 
 		ret = devm_request_irq(pdata->dev, pdata->an_irq,
 				       xgbe_an_isr, 0, pdata->an_name,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index c5e5fac49779..c636999a6a84 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -139,7 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
-	pdata->isr_as_tasklet = 1;
+	pdata->isr_as_bh_work = 1;
 	pdata->irq_count = ret;
 
 	pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0);
@@ -176,7 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
-	pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0;
+	pdata->isr_as_bh_work = pdata->pcidev->msi_enabled ? 1 : 0;
 	pdata->irq_count = 1;
 	pdata->channel_irq_count = 1;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f01a1e566da6..d85386cac8d1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1298,11 +1298,11 @@ struct xgbe_prv_data {
 
 	unsigned int lpm_ctrl;		/* CTRL1 for resume */
 
-	unsigned int isr_as_tasklet;
-	struct tasklet_struct tasklet_dev;
-	struct tasklet_struct tasklet_ecc;
-	struct tasklet_struct tasklet_i2c;
-	struct tasklet_struct tasklet_an;
+	unsigned int isr_as_bh_work;
+	struct work_struct dev_bh_work;
+	struct work_struct ecc_bh_work;
+	struct work_struct i2c_bh_work;
+	struct work_struct an_bh_work;
 
 	struct dentry *xgbe_debugfs;
 
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 292b1f9cd9e7..785f4b4ff758 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1317,7 +1317,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
 
 	timer_setup(&bp->tx_timeout, bmac_tx_timeout, 0);
 
-	ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev);
+	ret = request_irq(dev->irq, bmac_misc_intr, IRQF_NO_AUTOEN, "BMAC-misc", dev);
 	if (ret) {
 		printk(KERN_ERR "BMAC: can't get irq %d\n", dev->irq);
 		goto err_out_iounmap_rx;
@@ -1336,7 +1336,6 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	/* Mask chip interrupts and disable chip, will be
 	 * re-enabled on open()
 	 */
-	disable_irq(dev->irq);
 	pmac_call_feature(PMAC_FTR_BMAC_ENABLE, macio_get_of_node(bp->mdev), 0, 0);
 
 	if (register_netdev(dev) != 0) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index d0aecd1d7357..440ff4616fec 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -266,7 +266,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 		const int rx_stat_cnt = ARRAY_SIZE(aq_ethtool_queue_rx_stat_names);
 		const int tx_stat_cnt = ARRAY_SIZE(aq_ethtool_queue_tx_stat_names);
 		char tc_string[8];
-		int tc;
+		unsigned int tc;
 
 		memset(tc_string, 0, sizeof(tc_string));
 		memcpy(p, aq_ethtool_stat_names,
@@ -275,22 +275,20 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 
 		for (tc = 0; tc < cfg->tcs; tc++) {
 			if (cfg->is_qos)
-				snprintf(tc_string, 8, "TC%d ", tc);
+				snprintf(tc_string, 8, "TC%u ", tc);
 
 			for (i = 0; i < cfg->vecs; i++) {
 				for (si = 0; si < rx_stat_cnt; si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 					     aq_ethtool_queue_rx_stat_names[si],
 					     tc_string,
 					     AQ_NIC_CFG_TCVEC2RING(cfg, tc, i));
-					p += ETH_GSTRING_LEN;
 				}
 				for (si = 0; si < tx_stat_cnt; si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 					     aq_ethtool_queue_tx_stat_names[si],
 					     tc_string,
 					     AQ_NIC_CFG_TCVEC2RING(cfg, tc, i));
-					p += ETH_GSTRING_LEN;
 				}
 			}
 		}
@@ -305,20 +303,18 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 
 			for (i = 0; i < max(rx_ring_cnt, tx_ring_cnt); i++) {
 				for (si = 0; si < rx_stat_cnt; si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 						 aq_ethtool_queue_rx_stat_names[si],
 						 tc_string,
 						 i ? PTP_HWST_RING_IDX : ptp_ring_idx);
-					p += ETH_GSTRING_LEN;
 				}
 				if (i >= tx_ring_cnt)
 					continue;
 				for (si = 0; si < tx_stat_cnt; si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 						 aq_ethtool_queue_tx_stat_names[si],
 						 tc_string,
 						 i ? PTP_HWST_RING_IDX : ptp_ring_idx);
-					p += ETH_GSTRING_LEN;
 				}
 			}
 		}
@@ -338,9 +334,8 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 			for (si = 0;
 				si < ARRAY_SIZE(aq_macsec_txsc_stat_names);
 				si++) {
-				snprintf(p, ETH_GSTRING_LEN,
+				ethtool_sprintf(&p,
 					 aq_macsec_txsc_stat_names[si], i);
-				p += ETH_GSTRING_LEN;
 			}
 			aq_txsc = &nic->macsec_cfg->aq_txsc[i];
 			for (sa = 0; sa < MACSEC_NUM_AN; sa++) {
@@ -349,10 +344,9 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 				for (si = 0;
 				     si < ARRAY_SIZE(aq_macsec_txsa_stat_names);
 				     si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 						 aq_macsec_txsa_stat_names[si],
 						 i, sa);
-					p += ETH_GSTRING_LEN;
 				}
 			}
 		}
@@ -369,10 +363,9 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 				for (si = 0;
 				     si < ARRAY_SIZE(aq_macsec_rxsa_stat_names);
 				     si++) {
-					snprintf(p, ETH_GSTRING_LEN,
+					ethtool_sprintf(&p,
 						 aq_macsec_rxsa_stat_names[si],
 						 i, sa);
-					p += ETH_GSTRING_LEN;
 				}
 			}
 		}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index f7433abd6591..f21de0c21e52 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -557,7 +557,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi,
 				}
 
 				frag_cnt++;
-				next_ = buff_->next,
+				next_ = buff_->next;
 				buff_ = &self->buff_ring[next_];
 				is_rsc_completed =
 					aq_ring_dx_in_range(self->sw_head,
@@ -583,7 +583,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi,
 						err = -EIO;
 						goto err_exit;
 					}
-					next_ = buff_->next,
+					next_ = buff_->next;
 					buff_ = &self->buff_ring[next_];
 
 					buff_->is_cleaned = true;
diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index 482c58c4c584..bec5cdf8d1da 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -6,7 +6,7 @@
 config NET_VENDOR_ATHEROS
 	bool "Atheros devices"
 	default y
-	depends on (PCI || ATH79)
+	depends on PCI || ATH79 || COMPILE_TEST
 	help
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -19,7 +19,7 @@ if NET_VENDOR_ATHEROS
 
 config AG71XX
 	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
-	depends on ATH79
+	depends on ATH79 || COMPILE_TEST
 	select PHYLINK
 	imply NET_SELFTESTS
 	help
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index a38be924cdaa..9586b6894f7e 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -149,11 +149,11 @@
 #define FIFO_CFG4_MC		BIT(8)	/* Multicast Packet */
 #define FIFO_CFG4_BC		BIT(9)	/* Broadcast Packet */
 #define FIFO_CFG4_DR		BIT(10)	/* Dribble */
-#define FIFO_CFG4_LE		BIT(11)	/* Long Event */
-#define FIFO_CFG4_CF		BIT(12)	/* Control Frame */
-#define FIFO_CFG4_PF		BIT(13)	/* Pause Frame */
-#define FIFO_CFG4_UO		BIT(14)	/* Unsupported Opcode */
-#define FIFO_CFG4_VT		BIT(15)	/* VLAN tag detected */
+#define FIFO_CFG4_CF		BIT(11)	/* Control Frame */
+#define FIFO_CFG4_PF		BIT(12)	/* Pause Frame */
+#define FIFO_CFG4_UO		BIT(13)	/* Unsupported Opcode */
+#define FIFO_CFG4_VT		BIT(14)	/* VLAN tag detected */
+#define FIFO_CFG4_LE		BIT(15)	/* Long Event */
 #define FIFO_CFG4_FT		BIT(16)	/* Frame Truncated */
 #define FIFO_CFG4_UC		BIT(17)	/* Unicast Packet */
 #define FIFO_CFG4_INIT	(FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \
@@ -168,28 +168,28 @@
 #define FIFO_CFG5_DV		BIT(1)	/* RX_DV Event */
 #define FIFO_CFG5_FC		BIT(2)	/* False Carrier */
 #define FIFO_CFG5_CE		BIT(3)	/* Code Error */
-#define FIFO_CFG5_LM		BIT(4)	/* Length Mismatch */
-#define FIFO_CFG5_LO		BIT(5)	/* Length Out of Range */
-#define FIFO_CFG5_OK		BIT(6)	/* Packet is OK */
-#define FIFO_CFG5_MC		BIT(7)	/* Multicast Packet */
-#define FIFO_CFG5_BC		BIT(8)	/* Broadcast Packet */
-#define FIFO_CFG5_DR		BIT(9)	/* Dribble */
-#define FIFO_CFG5_CF		BIT(10)	/* Control Frame */
-#define FIFO_CFG5_PF		BIT(11)	/* Pause Frame */
-#define FIFO_CFG5_UO		BIT(12)	/* Unsupported Opcode */
-#define FIFO_CFG5_VT		BIT(13)	/* VLAN tag detected */
-#define FIFO_CFG5_LE		BIT(14)	/* Long Event */
-#define FIFO_CFG5_FT		BIT(15)	/* Frame Truncated */
-#define FIFO_CFG5_16		BIT(16)	/* unknown */
-#define FIFO_CFG5_17		BIT(17)	/* unknown */
+#define FIFO_CFG5_CR		BIT(4)  /* CRC error */
+#define FIFO_CFG5_LM		BIT(5)	/* Length Mismatch */
+#define FIFO_CFG5_LO		BIT(6)	/* Length Out of Range */
+#define FIFO_CFG5_OK		BIT(7)	/* Packet is OK */
+#define FIFO_CFG5_MC		BIT(8)	/* Multicast Packet */
+#define FIFO_CFG5_BC		BIT(9)	/* Broadcast Packet */
+#define FIFO_CFG5_DR		BIT(10)	/* Dribble */
+#define FIFO_CFG5_CF		BIT(11)	/* Control Frame */
+#define FIFO_CFG5_PF		BIT(12)	/* Pause Frame */
+#define FIFO_CFG5_UO		BIT(13)	/* Unsupported Opcode */
+#define FIFO_CFG5_VT		BIT(14)	/* VLAN tag detected */
+#define FIFO_CFG5_LE		BIT(15)	/* Long Event */
+#define FIFO_CFG5_FT		BIT(16)	/* Frame Truncated */
+#define FIFO_CFG5_UC		BIT(17)	/* Unicast Packet */
 #define FIFO_CFG5_SF		BIT(18)	/* Short Frame */
 #define FIFO_CFG5_BM		BIT(19)	/* Byte Mode */
 #define FIFO_CFG5_INIT	(FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \
-			 FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \
-			 FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \
-			 FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \
-			 FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \
-			 FIFO_CFG5_17 | FIFO_CFG5_SF)
+			 FIFO_CFG5_CE | FIFO_CFG5_LM | FIFO_CFG5_LO | \
+			 FIFO_CFG5_OK | FIFO_CFG5_MC | FIFO_CFG5_BC | \
+			 FIFO_CFG5_DR | FIFO_CFG5_CF | FIFO_CFG5_UO | \
+			 FIFO_CFG5_VT | FIFO_CFG5_LE | FIFO_CFG5_FT | \
+			 FIFO_CFG5_UC | FIFO_CFG5_SF)
 
 #define AG71XX_REG_TX_CTRL	0x0180
 #define TX_CTRL_TXE		BIT(0)	/* Tx Enable */
@@ -379,10 +379,7 @@ struct ag71xx {
 	u32 fifodata[3];
 	int mac_idx;
 
-	struct reset_control *mdio_reset;
-	struct mii_bus *mii_bus;
 	struct clk *clk_mdio;
-	struct clk *clk_eth;
 };
 
 static int ag71xx_desc_empty(struct ag71xx_desc *desc)
@@ -447,6 +444,13 @@ static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
 	ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
 }
 
+static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	return phylink_mii_ioctl(ag->phylink, ifr, cmd);
+}
+
 static void ag71xx_get_drvinfo(struct net_device *ndev,
 			       struct ethtool_drvinfo *info)
 {
@@ -504,8 +508,7 @@ static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset,
 	switch (sset) {
 	case ETH_SS_STATS:
 		for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++)
-			memcpy(data + i * ETH_GSTRING_LEN,
-			       ag71xx_statistics[i].name, ETH_GSTRING_LEN);
+			ethtool_puts(&data, ag71xx_statistics[i].name);
 		break;
 	case ETH_SS_TEST:
 		net_selftest_get_strings(data);
@@ -685,36 +688,27 @@ static int ag71xx_mdio_probe(struct ag71xx *ag)
 {
 	struct device *dev = &ag->pdev->dev;
 	struct net_device *ndev = ag->ndev;
+	struct reset_control *mdio_reset;
 	static struct mii_bus *mii_bus;
 	struct device_node *np, *mnp;
 	int err;
 
 	np = dev->of_node;
-	ag->mii_bus = NULL;
 
-	ag->clk_mdio = devm_clk_get(dev, "mdio");
+	ag->clk_mdio = devm_clk_get_enabled(dev, "mdio");
 	if (IS_ERR(ag->clk_mdio)) {
 		netif_err(ag, probe, ndev, "Failed to get mdio clk.\n");
 		return PTR_ERR(ag->clk_mdio);
 	}
 
-	err = clk_prepare_enable(ag->clk_mdio);
-	if (err) {
-		netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n");
-		return err;
-	}
-
 	mii_bus = devm_mdiobus_alloc(dev);
-	if (!mii_bus) {
-		err = -ENOMEM;
-		goto mdio_err_put_clk;
-	}
+	if (!mii_bus)
+		return -ENOMEM;
 
-	ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio");
-	if (IS_ERR(ag->mdio_reset)) {
+	mdio_reset = devm_reset_control_get_exclusive(dev, "mdio");
+	if (IS_ERR(mdio_reset)) {
 		netif_err(ag, probe, ndev, "Failed to get reset mdio.\n");
-		err = PTR_ERR(ag->mdio_reset);
-		goto mdio_err_put_clk;
+		return PTR_ERR(mdio_reset);
 	}
 
 	mii_bus->name = "ag71xx_mdio";
@@ -725,33 +719,18 @@ static int ag71xx_mdio_probe(struct ag71xx *ag)
 	mii_bus->parent = dev;
 	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx);
 
-	if (!IS_ERR(ag->mdio_reset)) {
-		reset_control_assert(ag->mdio_reset);
-		msleep(100);
-		reset_control_deassert(ag->mdio_reset);
-		msleep(200);
-	}
+	reset_control_assert(mdio_reset);
+	msleep(100);
+	reset_control_deassert(mdio_reset);
+	msleep(200);
 
 	mnp = of_get_child_by_name(np, "mdio");
-	err = of_mdiobus_register(mii_bus, mnp);
+	err = devm_of_mdiobus_register(dev, mii_bus, mnp);
 	of_node_put(mnp);
 	if (err)
-		goto mdio_err_put_clk;
-
-	ag->mii_bus = mii_bus;
+		return err;
 
 	return 0;
-
-mdio_err_put_clk:
-	clk_disable_unprepare(ag->clk_mdio);
-	return err;
-}
-
-static void ag71xx_mdio_remove(struct ag71xx *ag)
-{
-	if (ag->mii_bus)
-		mdiobus_unregister(ag->mii_bus);
-	clk_disable_unprepare(ag->clk_mdio);
 }
 
 static void ag71xx_hw_stop(struct ag71xx *ag)
@@ -1637,7 +1616,6 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 		unsigned int i = ring->curr & ring_mask;
 		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
 		int pktlen;
-		int err = 0;
 
 		if (ag71xx_desc_empty(desc))
 			break;
@@ -1660,6 +1638,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 
 		skb = napi_build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag));
 		if (!skb) {
+			ndev->stats.rx_errors++;
 			skb_free_frag(ring->buf[i].rx.rx_buf);
 			goto next;
 		}
@@ -1667,14 +1646,9 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 		skb_reserve(skb, offset);
 		skb_put(skb, pktlen);
 
-		if (err) {
-			ndev->stats.rx_dropped++;
-			kfree_skb(skb);
-		} else {
-			skb->dev = ndev;
-			skb->ip_summed = CHECKSUM_NONE;
-			list_add_tail(&skb->list, &rx_list);
-		}
+		skb->dev = ndev;
+		skb->ip_summed = CHECKSUM_NONE;
+		list_add_tail(&skb->list, &rx_list);
 
 next:
 		ring->buf[i].rx.rx_buf = NULL;
@@ -1799,7 +1773,7 @@ static const struct net_device_ops ag71xx_netdev_ops = {
 	.ndo_open		= ag71xx_open,
 	.ndo_stop		= ag71xx_stop,
 	.ndo_start_xmit		= ag71xx_hard_start_xmit,
-	.ndo_eth_ioctl		= phy_do_ioctl,
+	.ndo_eth_ioctl		= ag71xx_do_ioctl,
 	.ndo_tx_timeout		= ag71xx_tx_timeout,
 	.ndo_change_mtu		= ag71xx_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -1816,6 +1790,7 @@ static int ag71xx_probe(struct platform_device *pdev)
 	const struct ag71xx_dcfg *dcfg;
 	struct net_device *ndev;
 	struct resource *res;
+	struct clk *clk_eth;
 	int tx_size, err, i;
 	struct ag71xx *ag;
 
@@ -1846,10 +1821,10 @@ static int ag71xx_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	ag->clk_eth = devm_clk_get(&pdev->dev, "eth");
-	if (IS_ERR(ag->clk_eth)) {
+	clk_eth = devm_clk_get_enabled(&pdev->dev, "eth");
+	if (IS_ERR(clk_eth)) {
 		netif_err(ag, probe, ndev, "Failed to get eth clk.\n");
-		return PTR_ERR(ag->clk_eth);
+		return PTR_ERR(clk_eth);
 	}
 
 	SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -1870,6 +1845,12 @@ static int ag71xx_probe(struct platform_device *pdev)
 	if (!ag->mac_base)
 		return -ENOMEM;
 
+	/* ensure that HW is in manual polling mode before interrupts are
+	 * activated. Otherwise ag71xx_interrupt might call napi_schedule
+	 * before it is initialized by netif_napi_add.
+	 */
+	ag71xx_int_disable(ag, AG71XX_INT_POLL);
+
 	ndev->irq = platform_get_irq(pdev, 0);
 	err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
 			       0x0, dev_name(&pdev->dev), ndev);
@@ -1912,6 +1893,8 @@ static int ag71xx_probe(struct platform_device *pdev)
 	ag->stop_desc->next = (u32)ag->stop_desc_dma;
 
 	err = of_get_ethdev_address(np, ndev);
+	if (err == -EPROBE_DEFER)
+		return err;
 	if (err) {
 		netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
 		eth_hw_addr_random(ndev);
@@ -1926,33 +1909,27 @@ static int ag71xx_probe(struct platform_device *pdev)
 	netif_napi_add_weight(ndev, &ag->napi, ag71xx_poll,
 			      AG71XX_NAPI_WEIGHT);
 
-	err = clk_prepare_enable(ag->clk_eth);
-	if (err) {
-		netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
-		return err;
-	}
-
 	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
 
 	ag71xx_hw_init(ag);
 
 	err = ag71xx_mdio_probe(ag);
 	if (err)
-		goto err_put_clk;
+		return err;
 
 	platform_set_drvdata(pdev, ndev);
 
 	err = ag71xx_phylink_setup(ag);
 	if (err) {
 		netif_err(ag, probe, ndev, "failed to setup phylink (%d)\n", err);
-		goto err_mdio_remove;
+		return err;
 	}
 
-	err = register_netdev(ndev);
+	err = devm_register_netdev(&pdev->dev, ndev);
 	if (err) {
 		netif_err(ag, probe, ndev, "unable to register net device\n");
 		platform_set_drvdata(pdev, NULL);
-		goto err_mdio_remove;
+		return err;
 	}
 
 	netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n",
@@ -1960,27 +1937,6 @@ static int ag71xx_probe(struct platform_device *pdev)
 		   phy_modes(ag->phy_if_mode));
 
 	return 0;
-
-err_mdio_remove:
-	ag71xx_mdio_remove(ag);
-err_put_clk:
-	clk_disable_unprepare(ag->clk_eth);
-	return err;
-}
-
-static void ag71xx_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct ag71xx *ag;
-
-	if (!ndev)
-		return;
-
-	ag = netdev_priv(ndev);
-	unregister_netdev(ndev);
-	ag71xx_mdio_remove(ag);
-	clk_disable_unprepare(ag->clk_eth);
-	platform_set_drvdata(pdev, NULL);
 }
 
 static const u32 ar71xx_fifo_ar7100[] = {
@@ -2064,10 +2020,10 @@ static const struct of_device_id ag71xx_match[] = {
 	{ .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 },
 	{}
 };
+MODULE_DEVICE_TABLE(of, ag71xx_match);
 
 static struct platform_driver ag71xx_driver = {
 	.probe		= ag71xx_probe,
-	.remove_new	= ag71xx_remove,
 	.driver = {
 		.name	= "ag71xx",
 		.of_match_table = ag71xx_match,
@@ -2075,4 +2031,5 @@ static struct platform_driver ag71xx_driver = {
 };
 
 module_platform_driver(ag71xx_driver);
+MODULE_DESCRIPTION("Atheros AR71xx built-in ethernet mac driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index 20c6529ec135..297c2682a9cf 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1300,9 +1300,9 @@ static void bcmasp_remove_intfs(struct bcmasp_priv *priv)
 
 static int bcmasp_probe(struct platform_device *pdev)
 {
-	struct device_node *ports_node, *intf_node;
 	const struct bcmasp_plat_data *pdata;
 	struct device *dev = &pdev->dev;
+	struct device_node *ports_node;
 	struct bcmasp_priv *priv;
 	struct bcmasp_intf *intf;
 	int ret = 0, count = 0;
@@ -1374,12 +1374,11 @@ static int bcmasp_probe(struct platform_device *pdev)
 	}
 
 	i = 0;
-	for_each_available_child_of_node(ports_node, intf_node) {
+	for_each_available_child_of_node_scoped(ports_node, intf_node) {
 		intf = bcmasp_interface_create(priv, intf_node, i);
 		if (!intf) {
 			dev_err(dev, "Cannot create eth interface %d\n", i);
 			bcmasp_remove_intfs(priv);
-			of_node_put(intf_node);
 			ret = -ENOMEM;
 			goto of_put_exit;
 		}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index c7b56a5e5425..adf7b6b94941 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3640,16 +3640,12 @@ static int bnx2x_get_ts_info(struct net_device *dev,
 
 	if (bp->flags & PTP_SUPPORTED) {
 		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-					SOF_TIMESTAMPING_RX_SOFTWARE |
-					SOF_TIMESTAMPING_SOFTWARE |
 					SOF_TIMESTAMPING_TX_HARDWARE |
 					SOF_TIMESTAMPING_RX_HARDWARE |
 					SOF_TIMESTAMPING_RAW_HARDWARE;
 
 		if (bp->ptp_clock)
 			info->phc_index = ptp_clock_index(bp->ptp_clock);
-		else
-			info->phc_index = -1;
 
 		info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 				   (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 4e9215bce4ad..a018f251d198 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -868,6 +868,8 @@
 #define DORQ_REG_VF_TYPE_VALUE_0				 0x170258
 #define DORQ_REG_VF_USAGE_CT_LIMIT				 0x170340
 
+extern const u32 dmae_reg_go_c[];
+
 /* [RW 4] Initial activity counter value on the load request; when the
    shortcut is done. */
 #define DORQ_REG_SHRT_ACT_CNT					 0x170070
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 77d4cb4ad782..12198fc3ab22 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2652,10 +2652,10 @@ int bnx2x_get_vf_config(struct net_device *dev, int vfidx,
 		/* vlan */
 		if (bulletin->valid_bitmap & (1 << VLAN_VALID))
 			/* vlan configured by ndo so its in bulletin board */
-			memcpy(&ivi->vlan, &bulletin->vlan, VLAN_HLEN);
+			ivi->vlan = bulletin->vlan;
 		else
 			/* function has not been loaded yet. Show vlans as 0s */
-			memset(&ivi->vlan, 0, VLAN_HLEN);
+			ivi->vlan = 0;
 
 		mutex_unlock(&bp->vfdb->bulletin_mutex);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index 2bb133ae61c3..ba6729f2f9c0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -23,8 +23,6 @@
 #include "bnx2x_cmn.h"
 #include "bnx2x_sriov.h"
 
-extern const u32 dmae_reg_go_c[];
-
 /* Statistics */
 
 /*
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ffa74c26ee53..6e422e24750a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -69,6 +69,7 @@
 #include "bnxt_tc.h"
 #include "bnxt_devlink.h"
 #include "bnxt_debugfs.h"
+#include "bnxt_coredump.h"
 #include "bnxt_hwmon.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
@@ -301,10 +302,6 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
 #define DB_CP_FLAGS		(DB_KEY_CP | DB_IDX_VALID | DB_IRQ_DIS)
-#define DB_CP_IRQ_DIS_FLAGS	(DB_KEY_CP | DB_IRQ_DIS)
-
-#define BNXT_CP_DB_IRQ_DIS(db)						\
-		writel(DB_CP_IRQ_DIS_FLAGS, db)
 
 #define BNXT_DB_CQ(db, idx)						\
 	writel(DB_CP_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell)
@@ -2853,34 +2850,6 @@ static inline int bnxt_has_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr)
 	return TX_CMP_VALID(txcmp, raw_cons);
 }
 
-static irqreturn_t bnxt_inta(int irq, void *dev_instance)
-{
-	struct bnxt_napi *bnapi = dev_instance;
-	struct bnxt *bp = bnapi->bp;
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
-	u32 cons = RING_CMP(cpr->cp_raw_cons);
-	u32 int_status;
-
-	prefetch(&cpr->cp_desc_ring[CP_RING(cons)][CP_IDX(cons)]);
-
-	if (!bnxt_has_work(bp, cpr)) {
-		int_status = readl(bp->bar0 + BNXT_CAG_REG_LEGACY_INT_STATUS);
-		/* return if erroneous interrupt */
-		if (!(int_status & (0x10000 << cpr->cp_ring_struct.fw_ring_id)))
-			return IRQ_NONE;
-	}
-
-	/* disable ring IRQ */
-	BNXT_CP_DB_IRQ_DIS(cpr->cp_db.doorbell);
-
-	/* Return here if interrupt is shared and is disabled. */
-	if (unlikely(atomic_read(&bp->intr_sem) != 0))
-		return IRQ_HANDLED;
-
-	napi_schedule(&bnapi->napi);
-	return IRQ_HANDLED;
-}
-
 static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 			    int budget)
 {
@@ -5056,7 +5025,7 @@ void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr)
 		list_del_init(&fltr->list);
 }
 
-void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all)
+static void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all)
 {
 	struct bnxt_filter_base *usr_fltr, *tmp;
 
@@ -6579,7 +6548,8 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	req->dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
 	req->lb_rule = cpu_to_le16(0xffff);
 vnic_mru:
-	req->mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
+	vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
+	req->mru = cpu_to_le16(vnic->mru);
 
 	req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 #ifdef CONFIG_BNXT_SRIOV
@@ -6715,6 +6685,8 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 			bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP;
 		if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP)
 			bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP;
+		if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP)
+			bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH;
 	}
 	hwrm_req_drop(bp, req);
 	return rc;
@@ -6872,15 +6844,14 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 			req->cq_handle = cpu_to_le64(ring->handle);
 			req->enables |= cpu_to_le32(
 				RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID);
-		} else if (bp->flags & BNXT_FLAG_USING_MSIX) {
+		} else {
 			req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
 		}
 		break;
 	case HWRM_RING_ALLOC_NQ:
 		req->ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
 		req->length = cpu_to_le32(bp->cp_ring_mask + 1);
-		if (bp->flags & BNXT_FLAG_USING_MSIX)
-			req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+		req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
 		break;
 	default:
 		netdev_err(bp->dev, "hwrm alloc invalid ring type %d\n",
@@ -7591,19 +7562,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 	int rx = bp->rx_nr_rings, stat;
 	int vnic, grp = rx;
 
-	if (hw_resc->resv_tx_rings != bp->tx_nr_rings &&
-	    bp->hwrm_spec_code >= 0x10601)
-		return true;
-
 	/* Old firmware does not need RX ring reservations but we still
 	 * need to setup a default RSS map when needed.  With new firmware
 	 * we go through RX ring reservations first and then set up the
 	 * RSS map for the successfully reserved RX rings when needed.
 	 */
-	if (!BNXT_NEW_RM(bp)) {
+	if (!BNXT_NEW_RM(bp))
 		bnxt_check_rss_tbl_no_rmgr(bp);
+
+	if (hw_resc->resv_tx_rings != bp->tx_nr_rings &&
+	    bp->hwrm_spec_code >= 0x10601)
+		return true;
+
+	if (!BNXT_NEW_RM(bp))
 		return false;
-	}
 
 	vnic = bnxt_get_total_vnics(bp, rx);
 
@@ -7649,8 +7621,8 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num);
 static int __bnxt_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_rings hwr = {0};
+	int rx_rings, old_rx_rings, rc;
 	int cp = bp->cp_nr_rings;
-	int rx_rings, rc;
 	int ulp_msix = 0;
 	bool sh = false;
 	int tx_cp;
@@ -7684,6 +7656,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	hwr.grp = bp->rx_nr_rings;
 	hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr);
 	hwr.stat = bnxt_get_func_stat_ctxs(bp);
+	old_rx_rings = bp->hw_resc.resv_rx_rings;
 
 	rc = bnxt_hwrm_reserve_rings(bp, &hwr);
 	if (rc)
@@ -7738,7 +7711,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	if (!bnxt_rings_ok(bp, &hwr))
 		return -ENOMEM;
 
-	if (!netif_is_rxfh_configured(bp->dev))
+	if (old_rx_rings != bp->hw_resc.resv_rx_rings &&
+	    !netif_is_rxfh_configured(bp->dev))
 		bnxt_set_dflt_rss_indir_tbl(bp, NULL);
 
 	if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) {
@@ -8940,6 +8914,80 @@ skip_rdma:
 	return 0;
 }
 
+static int bnxt_hwrm_crash_dump_mem_cfg(struct bnxt *bp)
+{
+	struct hwrm_dbg_crashdump_medium_cfg_input *req;
+	u16 page_attr;
+	int rc;
+
+	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
+		return 0;
+
+	rc = hwrm_req_init(bp, req, HWRM_DBG_CRASHDUMP_MEDIUM_CFG);
+	if (rc)
+		return rc;
+
+	if (BNXT_PAGE_SIZE == 0x2000)
+		page_attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K;
+	else if (BNXT_PAGE_SIZE == 0x10000)
+		page_attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K;
+	else
+		page_attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K;
+	req->pg_size_lvl = cpu_to_le16(page_attr |
+				       bp->fw_crash_mem->ring_mem.depth);
+	req->pbl = cpu_to_le64(bp->fw_crash_mem->ring_mem.pg_tbl_map);
+	req->size = cpu_to_le32(bp->fw_crash_len);
+	req->output_dest_flags = cpu_to_le16(BNXT_DBG_CR_DUMP_MDM_CFG_DDR);
+	return hwrm_req_send(bp, req);
+}
+
+static void bnxt_free_crash_dump_mem(struct bnxt *bp)
+{
+	if (bp->fw_crash_mem) {
+		bnxt_free_ctx_pg_tbls(bp, bp->fw_crash_mem);
+		kfree(bp->fw_crash_mem);
+		bp->fw_crash_mem = NULL;
+	}
+}
+
+static int bnxt_alloc_crash_dump_mem(struct bnxt *bp)
+{
+	u32 mem_size = 0;
+	int rc;
+
+	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
+		return 0;
+
+	rc = bnxt_hwrm_get_dump_len(bp, BNXT_DUMP_CRASH, &mem_size);
+	if (rc)
+		return rc;
+
+	mem_size = round_up(mem_size, 4);
+
+	/* keep and use the existing pages */
+	if (bp->fw_crash_mem &&
+	    mem_size <= bp->fw_crash_mem->nr_pages * BNXT_PAGE_SIZE)
+		goto alloc_done;
+
+	if (bp->fw_crash_mem)
+		bnxt_free_ctx_pg_tbls(bp, bp->fw_crash_mem);
+	else
+		bp->fw_crash_mem = kzalloc(sizeof(*bp->fw_crash_mem),
+					   GFP_KERNEL);
+	if (!bp->fw_crash_mem)
+		return -ENOMEM;
+
+	rc = bnxt_alloc_ctx_pg_tbls(bp, bp->fw_crash_mem, mem_size, 1, NULL);
+	if (rc) {
+		bnxt_free_crash_dump_mem(bp);
+		return rc;
+	}
+
+alloc_done:
+	bp->fw_crash_len = mem_size;
+	return 0;
+}
+
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 {
 	struct hwrm_func_resource_qcaps_output *resp;
@@ -9115,6 +9163,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
 	if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
 		bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH;
+	if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED))
+		bp->fw_cap |= BNXT_FW_CAP_DFLT_VLAN_TPID_PCP;
 	if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED)
 		bp->fw_cap |= BNXT_FW_CAP_BACKING_STORE_V2;
 	if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP)
@@ -10086,6 +10136,26 @@ vnic_setup_err:
 	return rc;
 }
 
+int bnxt_hwrm_vnic_update(struct bnxt *bp, struct bnxt_vnic_info *vnic,
+			  u8 valid)
+{
+	struct hwrm_vnic_update_input *req;
+	int rc;
+
+	rc = hwrm_req_init(bp, req, HWRM_VNIC_UPDATE);
+	if (rc)
+		return rc;
+
+	req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+
+	if (valid & VNIC_UPDATE_REQ_ENABLES_MRU_VALID)
+		req->mru = cpu_to_le16(vnic->mru);
+
+	req->enables = cpu_to_le32(valid);
+
+	return hwrm_req_send(bp, req);
+}
+
 int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 {
 	int rc;
@@ -10245,7 +10315,7 @@ static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp)
 	}
 }
 
-void bnxt_clear_rss_ctxs(struct bnxt *bp)
+static void bnxt_clear_rss_ctxs(struct bnxt *bp)
 {
 	struct ethtool_rxfh_context *ctx;
 	unsigned long context;
@@ -10550,22 +10620,32 @@ static void bnxt_setup_msix(struct bnxt *bp)
 	}
 }
 
-static void bnxt_setup_inta(struct bnxt *bp)
+static int bnxt_init_int_mode(struct bnxt *bp);
+
+static int bnxt_change_msix(struct bnxt *bp, int total)
 {
-	const int len = sizeof(bp->irq_tbl[0].name);
+	struct msi_map map;
+	int i;
 
-	if (bp->num_tc) {
-		netdev_reset_tc(bp->dev);
-		bp->num_tc = 0;
+	/* add MSIX to the end if needed */
+	for (i = bp->total_irqs; i < total; i++) {
+		map = pci_msix_alloc_irq_at(bp->pdev, i, NULL);
+		if (map.index < 0)
+			return bp->total_irqs;
+		bp->irq_tbl[i].vector = map.virq;
+		bp->total_irqs++;
 	}
 
-	snprintf(bp->irq_tbl[0].name, len, "%s-%s-%d", bp->dev->name, "TxRx",
-		 0);
-	bp->irq_tbl[0].handler = bnxt_inta;
+	/* trim MSIX from the end if needed */
+	for (i = bp->total_irqs; i > total; i--) {
+		map.index = i - 1;
+		map.virq = bp->irq_tbl[i - 1].vector;
+		pci_msix_free_irq(bp->pdev, map);
+		bp->total_irqs--;
+	}
+	return bp->total_irqs;
 }
 
-static int bnxt_init_int_mode(struct bnxt *bp);
-
 static int bnxt_setup_int_mode(struct bnxt *bp)
 {
 	int rc;
@@ -10576,10 +10656,7 @@ static int bnxt_setup_int_mode(struct bnxt *bp)
 			return rc ?: -ENODEV;
 	}
 
-	if (bp->flags & BNXT_FLAG_USING_MSIX)
-		bnxt_setup_msix(bp);
-	else
-		bnxt_setup_inta(bp);
+	bnxt_setup_msix(bp);
 
 	rc = bnxt_set_real_num_queues(bp);
 	return rc;
@@ -10667,10 +10744,9 @@ static int bnxt_get_num_msix(struct bnxt *bp)
 	return bnxt_nq_rings_in_use(bp);
 }
 
-static int bnxt_init_msix(struct bnxt *bp)
+static int bnxt_init_int_mode(struct bnxt *bp)
 {
-	int i, total_vecs, max, rc = 0, min = 1, ulp_msix, tx_cp;
-	struct msix_entry *msix_ent;
+	int i, total_vecs, max, rc = 0, min = 1, ulp_msix, tx_cp, tbl_size;
 
 	total_vecs = bnxt_get_num_msix(bp);
 	max = bnxt_get_max_func_irqs(bp);
@@ -10680,29 +10756,24 @@ static int bnxt_init_msix(struct bnxt *bp)
 	if (!total_vecs)
 		return 0;
 
-	msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
-	if (!msix_ent)
-		return -ENOMEM;
-
-	for (i = 0; i < total_vecs; i++) {
-		msix_ent[i].entry = i;
-		msix_ent[i].vector = 0;
-	}
-
 	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS))
 		min = 2;
 
-	total_vecs = pci_enable_msix_range(bp->pdev, msix_ent, min, total_vecs);
+	total_vecs = pci_alloc_irq_vectors(bp->pdev, min, total_vecs,
+					   PCI_IRQ_MSIX);
 	ulp_msix = bnxt_get_ulp_msix_num(bp);
 	if (total_vecs < 0 || total_vecs < ulp_msix) {
 		rc = -ENODEV;
 		goto msix_setup_exit;
 	}
 
-	bp->irq_tbl = kcalloc(total_vecs, sizeof(struct bnxt_irq), GFP_KERNEL);
+	tbl_size = total_vecs;
+	if (pci_msix_can_alloc_dyn(bp->pdev))
+		tbl_size = max;
+	bp->irq_tbl = kcalloc(tbl_size, sizeof(*bp->irq_tbl), GFP_KERNEL);
 	if (bp->irq_tbl) {
 		for (i = 0; i < total_vecs; i++)
-			bp->irq_tbl[i].vector = msix_ent[i].vector;
+			bp->irq_tbl[i].vector = pci_irq_vector(bp->pdev, i);
 
 		bp->total_irqs = total_vecs;
 		/* Trim rings based upon num of vectors allocated */
@@ -10720,61 +10791,28 @@ static int bnxt_init_msix(struct bnxt *bp)
 		rc = -ENOMEM;
 		goto msix_setup_exit;
 	}
-	bp->flags |= BNXT_FLAG_USING_MSIX;
-	kfree(msix_ent);
 	return 0;
 
 msix_setup_exit:
-	netdev_err(bp->dev, "bnxt_init_msix err: %x\n", rc);
+	netdev_err(bp->dev, "bnxt_init_int_mode err: %x\n", rc);
 	kfree(bp->irq_tbl);
 	bp->irq_tbl = NULL;
-	pci_disable_msix(bp->pdev);
-	kfree(msix_ent);
-	return rc;
-}
-
-static int bnxt_init_inta(struct bnxt *bp)
-{
-	bp->irq_tbl = kzalloc(sizeof(struct bnxt_irq), GFP_KERNEL);
-	if (!bp->irq_tbl)
-		return -ENOMEM;
-
-	bp->total_irqs = 1;
-	bp->rx_nr_rings = 1;
-	bp->tx_nr_rings = 1;
-	bp->cp_nr_rings = 1;
-	bp->flags |= BNXT_FLAG_SHARED_RINGS;
-	bp->irq_tbl[0].vector = bp->pdev->irq;
-	return 0;
-}
-
-static int bnxt_init_int_mode(struct bnxt *bp)
-{
-	int rc = -ENODEV;
-
-	if (bp->flags & BNXT_FLAG_MSIX_CAP)
-		rc = bnxt_init_msix(bp);
-
-	if (!(bp->flags & BNXT_FLAG_USING_MSIX) && BNXT_PF(bp)) {
-		/* fallback to INTA */
-		rc = bnxt_init_inta(bp);
-	}
+	pci_free_irq_vectors(bp->pdev);
 	return rc;
 }
 
 static void bnxt_clear_int_mode(struct bnxt *bp)
 {
-	if (bp->flags & BNXT_FLAG_USING_MSIX)
-		pci_disable_msix(bp->pdev);
+	pci_free_irq_vectors(bp->pdev);
 
 	kfree(bp->irq_tbl);
 	bp->irq_tbl = NULL;
-	bp->flags &= ~BNXT_FLAG_USING_MSIX;
 }
 
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 {
 	bool irq_cleared = false;
+	bool irq_change = false;
 	int tcs = bp->num_tc;
 	int irqs_required;
 	int rc;
@@ -10793,15 +10831,21 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 	}
 
 	if (irq_re_init && BNXT_NEW_RM(bp) && irqs_required != bp->total_irqs) {
-		bnxt_ulp_irq_stop(bp);
-		bnxt_clear_int_mode(bp);
-		irq_cleared = true;
+		irq_change = true;
+		if (!pci_msix_can_alloc_dyn(bp->pdev)) {
+			bnxt_ulp_irq_stop(bp);
+			bnxt_clear_int_mode(bp);
+			irq_cleared = true;
+		}
 	}
 	rc = __bnxt_reserve_rings(bp);
 	if (irq_cleared) {
 		if (!rc)
 			rc = bnxt_init_int_mode(bp);
 		bnxt_ulp_irq_restart(bp, rc);
+	} else if (irq_change && !rc) {
+		if (bnxt_change_msix(bp, irqs_required) != irqs_required)
+			rc = -ENOSPC;
 	}
 	if (rc) {
 		netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
@@ -10867,9 +10911,6 @@ static int bnxt_request_irq(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	rmap = bp->dev->rx_cpu_rmap;
 #endif
-	if (!(bp->flags & BNXT_FLAG_USING_MSIX))
-		flags = IRQF_SHARED;
-
 	for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
 		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
 		struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
@@ -10934,29 +10975,22 @@ static void bnxt_del_napi(struct bnxt *bp)
 
 static void bnxt_init_napi(struct bnxt *bp)
 {
-	int i;
+	int (*poll_fn)(struct napi_struct *, int) = bnxt_poll;
 	unsigned int cp_nr_rings = bp->cp_nr_rings;
 	struct bnxt_napi *bnapi;
+	int i;
 
-	if (bp->flags & BNXT_FLAG_USING_MSIX) {
-		int (*poll_fn)(struct napi_struct *, int) = bnxt_poll;
-
-		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
-			poll_fn = bnxt_poll_p5;
-		else if (BNXT_CHIP_TYPE_NITRO_A0(bp))
-			cp_nr_rings--;
-		for (i = 0; i < cp_nr_rings; i++) {
-			bnapi = bp->bnapi[i];
-			netif_napi_add(bp->dev, &bnapi->napi, poll_fn);
-		}
-		if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
-			bnapi = bp->bnapi[cp_nr_rings];
-			netif_napi_add(bp->dev, &bnapi->napi,
-				       bnxt_poll_nitroa0);
-		}
-	} else {
-		bnapi = bp->bnapi[0];
-		netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll);
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+		poll_fn = bnxt_poll_p5;
+	else if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		cp_nr_rings--;
+	for (i = 0; i < cp_nr_rings; i++) {
+		bnapi = bp->bnapi[i];
+		netif_napi_add(bp->dev, &bnapi->napi, poll_fn);
+	}
+	if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
+		bnapi = bp->bnapi[cp_nr_rings];
+		netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll_nitroa0);
 	}
 }
 
@@ -11944,20 +11978,6 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
 	return rc;
 }
 
-/* Common routine to pre-map certain register block to different GRC window.
- * A PF has 16 4K windows and a VF has 4 4K windows. However, only 15 windows
- * in PF and 3 windows in VF that can be customized to map in different
- * register blocks.
- */
-static void bnxt_preset_reg_win(struct bnxt *bp)
-{
-	if (BNXT_PF(bp)) {
-		/* CAG registers map to GRC window #4 */
-		writel(BNXT_CAG_REG_BASE,
-		       bp->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 12);
-	}
-}
-
 static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
 
 static int bnxt_reinit_after_abort(struct bnxt *bp)
@@ -12062,7 +12082,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
 
-	bnxt_preset_reg_win(bp);
 	netif_carrier_off(bp->dev);
 	if (irq_re_init) {
 		/* Reserve rings now if none were reserved at driver probe. */
@@ -12075,12 +12094,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	rc = bnxt_reserve_rings(bp, irq_re_init);
 	if (rc)
 		return rc;
-	if ((bp->flags & BNXT_FLAG_RFS) &&
-	    !(bp->flags & BNXT_FLAG_USING_MSIX)) {
-		/* disable RFS if falling back to INTA */
-		bp->dev->hw_features &= ~NETIF_F_NTUPLE;
-		bp->flags &= ~BNXT_FLAG_RFS;
-	}
 
 	rc = bnxt_alloc_mem(bp, irq_re_init);
 	if (rc) {
@@ -12807,7 +12820,7 @@ bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx)
 	    !BNXT_SUPPORTS_NTUPLE_VNIC(bp))
 		return bnxt_rfs_supported(bp);
 
-	if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
+	if (!bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
 		return false;
 
 	hwr.grp = bp->rx_nr_rings;
@@ -13790,6 +13803,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp;
 	struct bnxt_hw_rings hwr = {0};
 	int rx_rings = rx;
+	int rc;
 
 	if (tcs)
 		tx_sets = tcs;
@@ -13822,7 +13836,23 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	}
 	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		hwr.cp_p5 = hwr.tx + rx;
-	return bnxt_hwrm_check_rings(bp, &hwr);
+	rc = bnxt_hwrm_check_rings(bp, &hwr);
+	if (!rc && pci_msix_can_alloc_dyn(bp->pdev)) {
+		if (!bnxt_ulp_registered(bp->edev)) {
+			hwr.cp += bnxt_get_ulp_msix_num(bp);
+			hwr.cp = min_t(int, hwr.cp, bnxt_get_max_func_irqs(bp));
+		}
+		if (hwr.cp > bp->total_irqs) {
+			int total_msix = bnxt_change_msix(bp, hwr.cp);
+
+			if (total_msix < hwr.cp) {
+				netdev_warn(bp->dev, "Unable to allocate %d MSIX vectors, maximum available %d\n",
+					    hwr.cp, total_msix);
+				rc = -ENOSPC;
+			}
+		}
+	}
+	return rc;
 }
 
 static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -13960,6 +13990,19 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
 	if (rc)
 		return -ENODEV;
 
+	rc = bnxt_alloc_crash_dump_mem(bp);
+	if (rc)
+		netdev_warn(bp->dev, "crash dump mem alloc failure rc: %d\n",
+			    rc);
+	if (!rc) {
+		rc = bnxt_hwrm_crash_dump_mem_cfg(bp);
+		if (rc) {
+			bnxt_free_crash_dump_mem(bp);
+			netdev_warn(bp->dev,
+				    "hwrm crash dump mem failure rc: %d\n", rc);
+		}
+	}
+
 	if (bnxt_fw_pre_resv_vnics(bp))
 		bp->fw_cap |= BNXT_FW_CAP_PRE_RESV_VNICS;
 
@@ -15151,7 +15194,8 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_rx_ring_info *rxr, *clone;
 	struct bnxt_cp_ring_info *cpr;
-	int rc;
+	struct bnxt_vnic_info *vnic;
+	int i, rc;
 
 	rxr = &bp->rx_ring[idx];
 	clone = qmem;
@@ -15176,11 +15220,16 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
 
-	napi_enable(&rxr->bnapi->napi);
-
 	cpr = &rxr->bnapi->cp_ring;
 	cpr->sw_stats->rx.rx_resets++;
 
+	for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
+		vnic = &bp->vnic_info[i];
+		vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
+		bnxt_hwrm_vnic_update(bp, vnic,
+				      VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
+	}
+
 	return 0;
 
 err_free_hwrm_rx_ring:
@@ -15192,9 +15241,17 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_rx_ring_info *rxr;
+	struct bnxt_vnic_info *vnic;
+	int i;
+
+	for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
+		vnic = &bp->vnic_info[i];
+		vnic->mru = 0;
+		bnxt_hwrm_vnic_update(bp, vnic,
+				      VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
+	}
 
 	rxr = &bp->rx_ring[idx];
-	napi_disable(&rxr->bnapi->napi);
 	bnxt_hwrm_rx_ring_free(bp, rxr, false);
 	bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
 	rxr->rx_next_cons = 0;
@@ -15254,6 +15311,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
+	bnxt_free_crash_dump_mem(bp);
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 	bnxt_free_port_stats(bp);
@@ -15641,6 +15699,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_is_bridge(pdev))
 		return -ENODEV;
 
+	if (!pdev->msix_cap) {
+		dev_err(&pdev->dev, "MSIX capability not found, aborting\n");
+		return -ENODEV;
+	}
+
 	/* Clear any pending DMA transactions from crash kernel
 	 * while loading driver in capture kernel.
 	 */
@@ -15667,9 +15730,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (BNXT_PF(bp))
 		SET_NETDEV_DEVLINK_PORT(dev, &bp->dl_port);
 
-	if (pdev->msix_cap)
-		bp->flags |= BNXT_FLAG_MSIX_CAP;
-
 	rc = bnxt_init_board(pdev, dev);
 	if (rc < 0)
 		goto init_err_free;
@@ -15678,7 +15738,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->stat_ops = &bnxt_stat_ops;
 	dev->watchdog_timeo = BNXT_TX_TIMEOUT;
 	dev->ethtool_ops = &bnxt_ethtool_ops;
-	dev->queue_mgmt_ops = &bnxt_queue_mgmt_ops;
 	pci_set_drvdata(pdev, dev);
 
 	rc = bnxt_alloc_hwrm_resources(bp);
@@ -15859,6 +15918,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if (BNXT_SUPPORTS_NTUPLE_VNIC(bp))
 		bp->rss_cap |= BNXT_RSS_CAP_MULTI_RSS_CTX;
+	if (BNXT_SUPPORTS_QUEUE_API(bp))
+		dev->queue_mgmt_ops = &bnxt_queue_mgmt_ops;
 
 	rc = register_netdev(dev);
 	if (rc)
@@ -15892,6 +15953,7 @@ init_err_pci_clean:
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
+	bnxt_free_crash_dump_mem(bp);
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 
@@ -15983,6 +16045,8 @@ static int bnxt_resume(struct device *device)
 		rc = -ENODEV;
 		goto resume_exit;
 	}
+	if (bp->fw_crash_mem)
+		bnxt_hwrm_crash_dump_mem_cfg(bp);
 
 	bnxt_get_wol_settings(bp);
 	if (netif_running(dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 6bbdc718c3a7..69231e85140b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1217,12 +1217,15 @@ struct bnxt_napi {
 	bool			in_reset;
 };
 
+/* "TxRx", 2 hypens, plus maximum integer */
+#define BNXT_IRQ_NAME_EXTRA	17
+
 struct bnxt_irq {
 	irq_handler_t	handler;
 	unsigned int	vector;
 	u8		requested:1;
 	u8		have_cpumask:1;
-	char		name[IFNAMSIZ + 2];
+	char		name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA];
 	cpumask_var_t	cpu_mask;
 };
 
@@ -1250,6 +1253,7 @@ struct bnxt_vnic_info {
 #define BNXT_MAX_CTX_PER_VNIC	8
 	u16		fw_rss_cos_lb_ctx[BNXT_MAX_CTX_PER_VNIC];
 	u16		fw_l2_ctx_id;
+	u16		mru;
 #define BNXT_MAX_UC_ADDRS	4
 	struct bnxt_l2_filter *l2_filters[BNXT_MAX_UC_ADDRS];
 				/* index 0 always dev_addr */
@@ -1355,7 +1359,6 @@ struct bnxt_vf_info {
 	u16	vlan;
 	u16	func_qcfg_flags;
 	u32	flags;
-#define BNXT_VF_QOS		0x1
 #define BNXT_VF_SPOOFCHK	0x2
 #define BNXT_VF_LINK_FORCED	0x4
 #define BNXT_VF_LINK_UP		0x8
@@ -1755,8 +1758,6 @@ struct bnxt_test_info {
 #define BNXT_GRCPF_REG_CHIMP_COMM		0x0
 #define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER	0x100
 #define BNXT_GRCPF_REG_WINDOW_BASE_OUT		0x400
-#define BNXT_CAG_REG_LEGACY_INT_STATUS		0x4014
-#define BNXT_CAG_REG_BASE			0x300000
 
 #define BNXT_GRC_REG_STATUS_P5			0x520
 
@@ -2199,8 +2200,6 @@ struct bnxt {
 	#define BNXT_FLAG_STRIP_VLAN	0x20
 	#define BNXT_FLAG_AGG_RINGS	(BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \
 					 BNXT_FLAG_LRO)
-	#define BNXT_FLAG_USING_MSIX	0x40
-	#define BNXT_FLAG_MSIX_CAP	0x80
 	#define BNXT_FLAG_RFS		0x100
 	#define BNXT_FLAG_SHARED_RINGS	0x200
 	#define BNXT_FLAG_PORT_STATS	0x400
@@ -2437,6 +2436,7 @@ struct bnxt {
 	#define BNXT_FW_CAP_VNIC_TUNNEL_TPA		BIT_ULL(37)
 	#define BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO	BIT_ULL(38)
 	#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3	BIT_ULL(39)
+	#define BNXT_FW_CAP_VNIC_RE_FLUSH		BIT_ULL(40)
 
 	u32			fw_dbg_cap;
 
@@ -2449,6 +2449,9 @@ struct bnxt {
 #define BNXT_SUPPORTS_MULTI_RSS_CTX(bp)				\
 	(BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) &&	\
 	 ((bp)->rss_cap & BNXT_RSS_CAP_MULTI_RSS_CTX))
+#define BNXT_SUPPORTS_QUEUE_API(bp)				\
+	(BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) &&	\
+	 ((bp)->fw_cap & BNXT_FW_CAP_VNIC_RE_FLUSH))
 
 	u32			hwrm_spec_code;
 	u16			hwrm_cmd_seq;
@@ -2644,6 +2647,9 @@ struct bnxt {
 #endif
 	u32			thermal_threshold_type;
 	enum board_idx		board_idx;
+
+	struct bnxt_ctx_pg_info	*fw_crash_mem;
+	u32			fw_crash_len;
 };
 
 #define BNXT_NUM_RX_RING_STATS			8
@@ -2790,7 +2796,6 @@ void bnxt_set_ring_params(struct bnxt *);
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
 void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
 void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr);
-void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all);
 int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
 			    int bmap_size, bool async_only);
 int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp);
@@ -2838,11 +2843,12 @@ int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
 int bnxt_hwrm_func_qcaps(struct bnxt *bp);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
+int bnxt_hwrm_vnic_update(struct bnxt *bp, struct bnxt_vnic_info *vnic,
+			  u8 valid);
 int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic);
 int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic);
 void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx,
 			  bool all);
-void bnxt_clear_rss_ctxs(struct bnxt *bp);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
index c06789882036..4e2b938ed1f7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
@@ -372,20 +372,81 @@ err:
 	return rc;
 }
 
+static u32 bnxt_copy_crash_data(struct bnxt_ring_mem_info *rmem, void *buf,
+				u32 dump_len)
+{
+	u32 data_copied = 0;
+	u32 data_len;
+	int i;
+
+	for (i = 0; i < rmem->nr_pages; i++) {
+		data_len = rmem->page_size;
+		if (data_copied + data_len > dump_len)
+			data_len = dump_len - data_copied;
+		memcpy(buf + data_copied, rmem->pg_arr[i], data_len);
+		data_copied += data_len;
+		if (data_copied >= dump_len)
+			break;
+	}
+	return data_copied;
+}
+
+static int bnxt_copy_crash_dump(struct bnxt *bp, void *buf, u32 dump_len)
+{
+	struct bnxt_ring_mem_info *rmem;
+	u32 offset = 0;
+
+	if (!bp->fw_crash_mem)
+		return -ENOENT;
+
+	rmem = &bp->fw_crash_mem->ring_mem;
+
+	if (rmem->depth > 1) {
+		int i;
+
+		for (i = 0; i < rmem->nr_pages; i++) {
+			struct bnxt_ctx_pg_info *pg_tbl;
+
+			pg_tbl = bp->fw_crash_mem->ctx_pg_tbl[i];
+			offset += bnxt_copy_crash_data(&pg_tbl->ring_mem,
+						       buf + offset,
+						       dump_len - offset);
+			if (offset >= dump_len)
+				break;
+		}
+	} else {
+		bnxt_copy_crash_data(rmem, buf, dump_len);
+	}
+
+	return 0;
+}
+
+static bool bnxt_crash_dump_avail(struct bnxt *bp)
+{
+	u32 sig = 0;
+
+	/* First 4 bytes(signature) of crash dump is always non-zero */
+	bnxt_copy_crash_dump(bp, &sig, sizeof(sig));
+	return !!sig;
+}
+
 int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len)
 {
 	if (dump_type == BNXT_DUMP_CRASH) {
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR)
+			return bnxt_copy_crash_dump(bp, buf, *dump_len);
 #ifdef CONFIG_TEE_BNXT_FW
-		return tee_bnxt_copy_coredump(buf, 0, *dump_len);
-#else
-		return -EOPNOTSUPP;
+		else if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)
+			return tee_bnxt_copy_coredump(buf, 0, *dump_len);
 #endif
+		else
+			return -EOPNOTSUPP;
 	} else {
 		return __bnxt_get_coredump(bp, buf, dump_len);
 	}
 }
 
-static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
+int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 {
 	struct hwrm_dbg_qcfg_output *resp;
 	struct hwrm_dbg_qcfg_input *req;
@@ -395,7 +456,8 @@ static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		return -EOPNOTSUPP;
 
 	if (dump_type == BNXT_DUMP_CRASH &&
-	    !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR))
+	    !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR ||
+	     (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR)))
 		return -EOPNOTSUPP;
 
 	rc = hwrm_req_init(bp, req, HWRM_DBG_QCFG);
@@ -403,8 +465,12 @@ static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		return rc;
 
 	req->fid = cpu_to_le16(0xffff);
-	if (dump_type == BNXT_DUMP_CRASH)
-		req->flags = cpu_to_le16(DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR);
+	if (dump_type == BNXT_DUMP_CRASH) {
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)
+			req->flags = cpu_to_le16(BNXT_DBG_FL_CR_DUMP_SIZE_SOC);
+		else
+			req->flags = cpu_to_le16(BNXT_DBG_FL_CR_DUMP_SIZE_HOST);
+	}
 
 	resp = hwrm_req_hold(bp, req);
 	rc = hwrm_req_send(bp, req);
@@ -412,7 +478,10 @@ static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		goto get_dump_len_exit;
 
 	if (dump_type == BNXT_DUMP_CRASH) {
-		*dump_len = le32_to_cpu(resp->crashdump_size);
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)
+			*dump_len = BNXT_CRASH_DUMP_LEN;
+		else
+			*dump_len = le32_to_cpu(resp->crashdump_size);
 	} else {
 		/* Driver adds coredump header and "HWRM_VER_GET response"
 		 * segment additionally to coredump.
@@ -434,10 +503,17 @@ u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type)
 {
 	u32 len = 0;
 
+	if (dump_type == BNXT_DUMP_CRASH &&
+	    bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR &&
+	    bp->fw_crash_mem) {
+		if (!bnxt_crash_dump_avail(bp))
+			return 0;
+
+		return bp->fw_crash_len;
+	}
+
 	if (bnxt_hwrm_get_dump_len(bp, dump_type, &len)) {
-		if (dump_type == BNXT_DUMP_CRASH)
-			len = BNXT_CRASH_DUMP_LEN;
-		else
+		if (dump_type != BNXT_DUMP_CRASH)
 			__bnxt_get_coredump(bp, NULL, &len);
 	}
 	return len;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
index b1a1b2fffb19..a76d5c281413 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
@@ -111,7 +111,15 @@ struct hwrm_dbg_cmn_output {
 	#define HWRM_DBG_CMN_FLAGS_MORE	1
 };
 
+#define BNXT_DBG_FL_CR_DUMP_SIZE_SOC	\
+	DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR
+#define BNXT_DBG_FL_CR_DUMP_SIZE_HOST	\
+	DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_HOST_DDR
+#define BNXT_DBG_CR_DUMP_MDM_CFG_DDR	\
+	DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR
+
 int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len);
+int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len);
 u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type);
 
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
index 156c2404854f..127b7015f676 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
@@ -64,9 +64,9 @@ static const struct file_operations debugfs_dim_fops = {
 static void debugfs_dim_ring_init(struct dim *dim, int ring_idx,
 				  struct dentry *dd)
 {
-	static char qname[16];
+	static char qname[12];
 
-	snprintf(qname, 10, "%d", ring_idx);
+	snprintf(qname, sizeof(qname), "%d", ring_idx);
 	debugfs_create_file(qname, 0600, dd, dim, &debugfs_dim_fops);
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index d00ef0063820..f71cc8188b4e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -955,11 +955,6 @@ static int bnxt_set_channels(struct net_device *dev,
 		}
 		tx_xdp = req_rx_rings;
 	}
-	rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp);
-	if (rc) {
-		netdev_warn(dev, "Unable to allocate the requested rings\n");
-		return rc;
-	}
 
 	if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
 	    bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
@@ -968,9 +963,12 @@ static int bnxt_set_channels(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	bnxt_clear_usr_fltrs(bp, true);
-	if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp))
-		bnxt_clear_rss_ctxs(bp);
+	rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp);
+	if (rc) {
+		netdev_warn(dev, "Unable to allocate the requested rings\n");
+		return rc;
+	}
+
 	if (netif_running(dev)) {
 		if (BNXT_PF(bp)) {
 			/* TODO CHIMP_FW: Send message to all VF's
@@ -1863,8 +1861,14 @@ static void bnxt_modify_rss(struct bnxt *bp, struct ethtool_rxfh_context *ctx,
 }
 
 static int bnxt_rxfh_context_check(struct bnxt *bp,
+				   const struct ethtool_rxfh_param *rxfh,
 				   struct netlink_ext_ack *extack)
 {
+	if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) {
+		NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported");
+		return -EOPNOTSUPP;
+	}
+
 	if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) {
 		NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported");
 		return -EOPNOTSUPP;
@@ -1888,7 +1892,7 @@ static int bnxt_create_rxfh_context(struct net_device *dev,
 	struct bnxt_vnic_info *vnic;
 	int rc;
 
-	rc = bnxt_rxfh_context_check(bp, extack);
+	rc = bnxt_rxfh_context_check(bp, rxfh, extack);
 	if (rc)
 		return rc;
 
@@ -1915,8 +1919,12 @@ static int bnxt_create_rxfh_context(struct net_device *dev,
 	if (rc)
 		goto out;
 
+	/* Populate defaults in the context */
 	bnxt_set_dflt_rss_indir_tbl(bp, ctx);
+	ctx->hfunc = ETH_RSS_HASH_TOP;
 	memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE);
+	memcpy(ethtool_rxfh_context_key(ctx),
+	       bp->rss_hash_key, HW_HASH_KEY_SIZE);
 
 	rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings);
 	if (rc) {
@@ -1953,7 +1961,7 @@ static int bnxt_modify_rxfh_context(struct net_device *dev,
 	struct bnxt_rss_ctx *rss_ctx;
 	int rc;
 
-	rc = bnxt_rxfh_context_check(bp, extack);
+	rc = bnxt_rxfh_context_check(bp, rxfh, extack);
 	if (rc)
 		return rc;
 
@@ -1990,7 +1998,6 @@ static int bnxt_set_rxfh(struct net_device *dev,
 
 	bnxt_modify_rss(bp, NULL, NULL, rxfh);
 
-	bnxt_clear_usr_fltrs(bp, false);
 	if (netif_running(bp->dev)) {
 		bnxt_close_nic(bp, false, false);
 		rc = bnxt_open_nic(bp, false, false);
@@ -4151,7 +4158,7 @@ static void bnxt_get_pkgver(struct net_device *dev)
 
 	if (!bnxt_get_pkginfo(dev, buf, sizeof(buf))) {
 		len = strlen(bp->fw_ver_str);
-		snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1,
+		snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len,
 			 "/pkg %s", buf);
 	}
 }
@@ -4983,9 +4990,16 @@ static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump)
 		return -EINVAL;
 	}
 
-	if (!IS_ENABLED(CONFIG_TEE_BNXT_FW) && dump->flag == BNXT_DUMP_CRASH) {
-		netdev_info(dev, "Cannot collect crash dump as TEE_BNXT_FW config option is not enabled.\n");
-		return -EOPNOTSUPP;
+	if (dump->flag == BNXT_DUMP_CRASH) {
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR &&
+		    (!IS_ENABLED(CONFIG_TEE_BNXT_FW))) {
+			netdev_info(dev,
+				    "Cannot collect crash dump as TEE_BNXT_FW config option is not enabled.\n");
+			return -EOPNOTSUPP;
+		} else if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR)) {
+			netdev_info(dev, "Crash dump collection from host memory is not supported on this interface.\n");
+			return -EOPNOTSUPP;
+		}
 	}
 
 	bp->dump_flag = dump->flag;
@@ -5030,11 +5044,8 @@ static int bnxt_get_ts_info(struct net_device *dev,
 	struct bnxt_ptp_cfg *ptp;
 
 	ptp = bp->ptp_cfg;
-	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
 
-	info->phc_index = -1;
 	if (!ptp)
 		return 0;
 
@@ -5279,8 +5290,8 @@ void bnxt_ethtool_free(struct bnxt *bp)
 
 const struct ethtool_ops bnxt_ethtool_ops = {
 	.cap_link_lanes_supported	= 1,
-	.cap_rss_ctx_supported		= 1,
-	.rxfh_max_context_id		= BNXT_MAX_ETH_RSS_CTX,
+	.rxfh_per_ctx_key		= 1,
+	.rxfh_max_num_contexts		= BNXT_MAX_ETH_RSS_CTX + 1,
 	.rxfh_indir_space		= BNXT_MAX_RSS_TABLE_ENTRIES_P5,
 	.rxfh_priv_size			= sizeof(struct bnxt_rss_ctx),
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index f219709f9563..f8ef6f1a1964 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -403,6 +403,9 @@ struct cmd_nums {
 	#define HWRM_FUNC_LAG_UPDATE                      0x1b1UL
 	#define HWRM_FUNC_LAG_FREE                        0x1b2UL
 	#define HWRM_FUNC_LAG_QCFG                        0x1b3UL
+	#define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD         0x1c2UL
+	#define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE      0x1c3UL
+	#define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY       0x1c4UL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
@@ -430,6 +433,9 @@ struct cmd_nums {
 	#define HWRM_STAT_GENERIC_QSTATS                  0x218UL
 	#define HWRM_MFG_PRVSN_EXPORT_CERT                0x219UL
 	#define HWRM_STAT_DB_ERROR_QSTATS                 0x21aUL
+	#define HWRM_MFG_TESTS                            0x21bUL
+	#define HWRM_PORT_POE_CFG                         0x230UL
+	#define HWRM_PORT_POE_QCFG                        0x231UL
 	#define HWRM_UDCC_QCAPS                           0x258UL
 	#define HWRM_UDCC_CFG                             0x259UL
 	#define HWRM_UDCC_QCFG                            0x25aUL
@@ -439,6 +445,9 @@ struct cmd_nums {
 	#define HWRM_UDCC_COMP_CFG                        0x25eUL
 	#define HWRM_UDCC_COMP_QCFG                       0x25fUL
 	#define HWRM_UDCC_COMP_QUERY                      0x260UL
+	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS            0x261UL
+	#define HWRM_QUEUE_PFCWD_TIMEOUT_CFG              0x262UL
+	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG             0x263UL
 	#define HWRM_TF                                   0x2bcUL
 	#define HWRM_TF_VERSION_GET                       0x2bdUL
 	#define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -500,10 +509,8 @@ struct cmd_nums {
 	#define HWRM_TFC_IF_TBL_GET                       0x399UL
 	#define HWRM_TFC_TBL_SCOPE_CONFIG_GET             0x39aUL
 	#define HWRM_TFC_RESC_USAGE_QUERY                 0x39bUL
-	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS            0x39cUL
-	#define HWRM_QUEUE_PFCWD_TIMEOUT_CFG              0x39dUL
-	#define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG             0x39eUL
 	#define HWRM_SV                                   0x400UL
+	#define HWRM_DBG_SERDES_TEST                      0xff0eUL
 	#define HWRM_DBG_LOG_BUFFER_FLUSH                 0xff0fUL
 	#define HWRM_DBG_READ_DIRECT                      0xff10UL
 	#define HWRM_DBG_READ_INDIRECT                    0xff11UL
@@ -533,6 +540,9 @@ struct cmd_nums {
 	#define HWRM_DBG_USEQ_RUN                         0xff29UL
 	#define HWRM_DBG_USEQ_DELIVERY_REQ                0xff2aUL
 	#define HWRM_DBG_USEQ_RESP_HDR                    0xff2bUL
+	#define HWRM_DBG_COREDUMP_CAPTURE                 0xff2cUL
+	#define HWRM_DBG_PTRACE                           0xff2dUL
+	#define HWRM_DBG_SIM_CABLE_STATE                  0xff2eUL
 	#define HWRM_NVM_GET_VPD_FIELD_INFO               0xffeaUL
 	#define HWRM_NVM_SET_VPD_FIELD_INFO               0xffebUL
 	#define HWRM_NVM_DEFRAG                           0xffecUL
@@ -582,6 +592,7 @@ struct ret_codes {
 	#define HWRM_ERR_CODE_RESOURCE_LOCKED              0x11UL
 	#define HWRM_ERR_CODE_PF_UNAVAILABLE               0x12UL
 	#define HWRM_ERR_CODE_ENTITY_NOT_PRESENT           0x13UL
+	#define HWRM_ERR_CODE_SECURE_SOC_ERROR             0x14UL
 	#define HWRM_ERR_CODE_TLV_ENCAPSULATED_RESPONSE    0x8000UL
 	#define HWRM_ERR_CODE_UNKNOWN_ERR                  0xfffeUL
 	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED            0xffffUL
@@ -613,8 +624,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 3
-#define HWRM_VERSION_RSVD 44
-#define HWRM_VERSION_STR "1.10.3.44"
+#define HWRM_VERSION_RSVD 68
+#define HWRM_VERSION_STR "1.10.3.68"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -850,7 +861,10 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_UDCC_SESSION_CHANGE             0x4bUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER                0x4cUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PEER_MMAP_CHANGE                0x4dUL
-	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID               0x4eUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE         0x4eUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE                  0x4fUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP                   0x50UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID               0x51UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG                    0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                      0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                           ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1691,7 +1705,7 @@ struct hwrm_func_qcaps_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcaps_output (size:1088b/136B) */
+/* hwrm_func_qcaps_output (size:1152b/144B) */
 struct hwrm_func_qcaps_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1824,6 +1838,9 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_EGRESS_NIC_FLOW_SUPPORTED          0x4000000UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT2_MULTI_LOSSLESS_QUEUES_SUPPORTED       0x8000000UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT2_PEER_MMAP_SUPPORTED                   0x10000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT2_TIMED_TX_PACING_SUPPORTED             0x20000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT2_VF_STAT_EJECTION_SUPPORTED            0x40000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT2_HOST_COREDUMP_SUPPORTED               0x80000000UL
 	__le16	tunnel_disable_flag;
 	#define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN      0x1UL
 	#define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE        0x2UL
@@ -1845,7 +1862,9 @@ struct hwrm_func_qcaps_output {
 	__le32	roce_vf_max_qp;
 	__le32	roce_vf_max_srq;
 	__le32	roce_vf_max_gid;
-	u8	unused_3[3];
+	__le32	flags_ext3;
+	#define FUNC_QCAPS_RESP_FLAGS_EXT3_RM_RSV_WHILE_ALLOC_CAP     0x1UL
+	u8	unused_3[7];
 	u8	valid;
 };
 
@@ -2021,7 +2040,8 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
 	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST         FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100
 	__le16	host_mtu;
-	u8	unused_3[2];
+	__le16	flags2;
+	#define FUNC_QCFG_RESP_FLAGS2_SRIOV_DSCP_INSERT_ENABLED     0x1UL
 	u8	unused_4[2];
 	u8	port_kdnet_mode;
 	#define FUNC_QCFG_RESP_PORT_KDNET_MODE_DISABLED 0x0UL
@@ -3671,33 +3691,38 @@ struct hwrm_func_backing_store_cfg_v2_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	type;
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP              0x0UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ             0x1UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ              0x2UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC            0x3UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT            0x4UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING     0x5UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING     0x6UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV            0xeUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM             0xfUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK           0x13UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK           0x14UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING     0x15UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW    0x16UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW    0x17UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW   0x18UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW    0x19UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE       0x1cUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION   0x1dUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE       0x1eUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE      0x1fUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE       0x20UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT2_TRACE      0x21UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP0_TRACE     0x22UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_L2_HWRM_TRACE   0x23UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID         0xffffUL
-	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST           FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP                  0x0UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ                 0x1UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ                  0x2UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC                0x3UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT                0x4UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING         0x5UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING         0x6UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV                0xeUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM                 0xfUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK               0x13UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK               0x14UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING         0x15UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW        0x16UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW        0x17UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW       0x18UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW        0x19UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE           0x1cUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION       0x1dUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE           0x1eUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE          0x1fUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE           0x20UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT2_TRACE          0x21UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP0_TRACE         0x22UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_L2_HWRM_TRACE       0x23UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ROCE_HWRM_TRACE     0x24UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA0_TRACE           0x26UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA1_TRACE           0x27UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE           0x28UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID             0xffffUL
+	#define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
 	__le16	instance;
 	__le32	flags;
 	#define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE        0x1UL
@@ -3772,6 +3797,11 @@ struct hwrm_func_backing_store_qcfg_v2_input {
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP0_TRACE         0x22UL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_L2_HWRM_TRACE       0x23UL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ROCE_HWRM_TRACE     0x24UL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA0_TRACE           0x26UL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA1_TRACE           0x27UL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE           0x28UL
+	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID             0xffffUL
 	#define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
 	__le16	instance;
@@ -3785,29 +3815,34 @@ struct hwrm_func_backing_store_qcfg_v2_output {
 	__le16	seq_id;
 	__le16	resp_len;
 	__le16	type;
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP              0x0UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ             0x1UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ              0x2UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC            0x3UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT            0x4UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING     0x5UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING     0x6UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV            0xeUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM             0xfUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK           0x13UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK           0x14UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING     0x15UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE       0x1cUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION   0x1dUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE       0x1eUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE      0x1fUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE       0x20UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT2_TRACE      0x21UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP0_TRACE     0x22UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_L2_HWRM_TRACE   0x23UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID         0xffffUL
-	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST           FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP                  0x0UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ                 0x1UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ                  0x2UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC                0x3UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT                0x4UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING         0x5UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING         0x6UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV                0xeUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM                 0xfUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK               0x13UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK               0x14UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING         0x15UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE           0x1cUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION       0x1dUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE           0x1eUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE          0x1fUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE           0x20UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT2_TRACE          0x21UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP0_TRACE         0x22UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_L2_HWRM_TRACE       0x23UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ROCE_HWRM_TRACE     0x24UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TTX_PACING_TQM_RING 0x25UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA0_TRACE           0x26UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE           0x27UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE           0x28UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE         0x29UL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID             0xffffUL
+	#define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST               FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
 	__le16	instance;
 	__le32	flags;
 	__le64	page_dir;
@@ -3883,6 +3918,13 @@ struct ts_split_entries {
 	__le32	rsvd2[2];
 };
 
+/* ck_split_entries (size:128b/16B) */
+struct ck_split_entries {
+	__le32	num_quic_entries;
+	__le32	rsvd;
+	__le32	rsvd2[2];
+};
+
 /* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
 struct hwrm_func_backing_store_qcaps_v2_input {
 	__le16	req_type;
@@ -3891,33 +3933,38 @@ struct hwrm_func_backing_store_qcaps_v2_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	type;
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP              0x0UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ             0x1UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ              0x2UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC            0x3UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT            0x4UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING     0x5UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING     0x6UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV            0xeUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM             0xfUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK           0x13UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK           0x14UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING     0x15UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW    0x16UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW    0x17UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW   0x18UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW    0x19UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE       0x1cUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION   0x1dUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE       0x1eUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE      0x1fUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE       0x20UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT2_TRACE      0x21UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP0_TRACE     0x22UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_L2_HWRM_TRACE   0x23UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID         0xffffUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST           FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP                  0x0UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ                 0x1UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ                  0x2UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC                0x3UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT                0x4UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING         0x5UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING         0x6UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV                0xeUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM                 0xfUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK               0x13UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK               0x14UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING         0x15UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW        0x16UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW        0x17UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW       0x18UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW        0x19UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE           0x1cUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION       0x1dUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE           0x1eUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE          0x1fUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE           0x20UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT2_TRACE          0x21UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP0_TRACE         0x22UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_L2_HWRM_TRACE       0x23UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE     0x24UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TTX_PACING_TQM_RING 0x25UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA0_TRACE           0x26UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA1_TRACE           0x27UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE           0x28UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE         0x29UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID             0xffffUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST               FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
 	u8	rsvd[6];
 };
 
@@ -3928,39 +3975,45 @@ struct hwrm_func_backing_store_qcaps_v2_output {
 	__le16	seq_id;
 	__le16	resp_len;
 	__le16	type;
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP              0x0UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ             0x1UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ              0x2UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC            0x3UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT            0x4UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING     0x5UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING     0x6UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV            0xeUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM             0xfUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK           0x13UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK           0x14UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING     0x15UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW    0x16UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW    0x17UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW   0x18UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW    0x19UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE       0x1cUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION   0x1dUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE       0x1eUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE      0x1fUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE       0x20UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT2_TRACE      0x21UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP0_TRACE     0x22UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_L2_HWRM_TRACE   0x23UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID         0xffffUL
-	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST           FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP                  0x0UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ                 0x1UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ                  0x2UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC                0x3UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT                0x4UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING         0x5UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING         0x6UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV                0xeUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM                 0xfUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK               0x13UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK               0x14UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING         0x15UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW        0x16UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW        0x17UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW       0x18UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW        0x19UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE           0x1cUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION       0x1dUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE           0x1eUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE          0x1fUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE           0x20UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT2_TRACE          0x21UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP0_TRACE         0x22UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_L2_HWRM_TRACE       0x23UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ROCE_HWRM_TRACE     0x24UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TTX_PACING_TQM_RING 0x25UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA0_TRACE           0x26UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA1_TRACE           0x27UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE           0x28UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE         0x29UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID             0xffffUL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST               FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
 	__le16	entry_size;
 	__le32	flags;
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT            0x1UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID                      0x2UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_DRIVER_MANAGED_MEMORY           0x4UL
 	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ROCE_QP_PSEUDO_STATIC_ALLOC     0x8UL
+	#define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_FW_DBG_TRACE                    0x10UL
 	__le32	instance_bit_map;
 	u8	ctx_init_value;
 	u8	ctx_init_offset;
@@ -4410,6 +4463,7 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN       0x3UL
 	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED   0x4UL
 	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_CURRENTFAULT  0x5UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_OVERHEATED    0x6UL
 	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL
 	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_LAST         PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE
 	__le32	preemphasis;
@@ -4941,7 +4995,9 @@ struct hwrm_port_qstats_output {
 	__le16	resp_len;
 	__le16	tx_stat_size;
 	__le16	rx_stat_size;
-	u8	unused_0[3];
+	u8	flags;
+	#define PORT_QSTATS_RESP_FLAGS_CLEARED     0x1UL
+	u8	unused_0[2];
 	u8	valid;
 };
 
@@ -5074,6 +5130,7 @@ struct hwrm_port_qstats_ext_output {
 	__le16	total_active_cos_queues;
 	u8	flags;
 	#define PORT_QSTATS_EXT_RESP_FLAGS_CLEAR_ROCE_COUNTERS_SUPPORTED     0x1UL
+	#define PORT_QSTATS_EXT_RESP_FLAGS_CLEARED                           0x2UL
 	u8	valid;
 };
 
@@ -6510,6 +6567,43 @@ struct hwrm_vnic_alloc_output {
 	u8	valid;
 };
 
+/* hwrm_vnic_update_input (size:256b/32B) */
+struct hwrm_vnic_update_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	vnic_id;
+	__le32	enables;
+	#define VNIC_UPDATE_REQ_ENABLES_VNIC_STATE_VALID               0x1UL
+	#define VNIC_UPDATE_REQ_ENABLES_MRU_VALID                      0x2UL
+	#define VNIC_UPDATE_REQ_ENABLES_METADATA_FORMAT_TYPE_VALID     0x4UL
+	u8	vnic_state;
+	#define VNIC_UPDATE_REQ_VNIC_STATE_NORMAL 0x0UL
+	#define VNIC_UPDATE_REQ_VNIC_STATE_DROP   0x1UL
+	#define VNIC_UPDATE_REQ_VNIC_STATE_LAST  VNIC_UPDATE_REQ_VNIC_STATE_DROP
+	u8	metadata_format_type;
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_0 0x0UL
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_1 0x1UL
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_2 0x2UL
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_3 0x3UL
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_4 0x4UL
+	#define VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_LAST VNIC_UPDATE_REQ_METADATA_FORMAT_TYPE_4
+	__le16	mru;
+	u8	unused_1[4];
+};
+
+/* hwrm_vnic_update_output (size:128b/16B) */
+struct hwrm_vnic_update_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
 /* hwrm_vnic_free_input (size:192b/24B) */
 struct hwrm_vnic_free_input {
 	__le16	req_type;
@@ -6640,6 +6734,7 @@ struct hwrm_vnic_qcaps_output {
 	#define VNIC_QCAPS_RESP_FLAGS_RSS_PROF_TCAM_MODE_ENABLED              0x8000000UL
 	#define VNIC_QCAPS_RESP_FLAGS_VNIC_RSS_HASH_MODE_CAP                  0x10000000UL
 	#define VNIC_QCAPS_RESP_FLAGS_HW_TUNNEL_TPA_CAP                       0x20000000UL
+	#define VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP                            0x40000000UL
 	__le16	max_aggs_supported;
 	u8	unused_1[5];
 	u8	valid;
@@ -7484,23 +7579,24 @@ struct hwrm_cfa_l2_filter_cfg_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH                0x1UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX               0x0UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX               0x1UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST            CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP                0x2UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK        0xcUL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT         2
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2    (0x0UL << 2)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2            (0x1UL << 2)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE          (0x2UL << 2)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST         CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK       0x30UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT        4
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE    (0x0UL << 4)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_BYPASS_LKUP  (0x1UL << 4)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP  (0x2UL << 4)
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_LAST        CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH                  0x1UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX                 0x0UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX                 0x1UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST              CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP                  0x2UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK          0xcUL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT           2
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2      (0x0UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2              (0x1UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE            (0x2UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST           CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK         0x30UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT          4
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE      (0x0UL << 4)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_BYPASS_LKUP    (0x1UL << 4)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP    (0x2UL << 4)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_RESTORE_FW_OP  (0x3UL << 4)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_LAST          CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_RESTORE_FW_OP
 	__le32	enables;
 	#define CFA_L2_FILTER_CFG_REQ_ENABLES_DST_ID                 0x1UL
 	#define CFA_L2_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID     0x2UL
@@ -8766,7 +8862,7 @@ struct ctx_hw_stats_ext {
 	__le64	rx_tpa_events;
 };
 
-/* hwrm_stat_ctx_alloc_input (size:320b/40B) */
+/* hwrm_stat_ctx_alloc_input (size:384b/48B) */
 struct hwrm_stat_ctx_alloc_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -8776,13 +8872,16 @@ struct hwrm_stat_ctx_alloc_input {
 	__le64	stats_dma_addr;
 	__le32	update_period_ms;
 	u8	stat_ctx_flags;
-	#define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE     0x1UL
+	#define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE             0x1UL
+	#define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_DUP_HOST_BUF     0x2UL
 	u8	unused_0;
 	__le16	stats_dma_length;
 	__le16	flags;
 	#define STAT_CTX_ALLOC_REQ_FLAGS_STEERING_TAG_VALID     0x1UL
 	__le16	steering_tag;
-	__le32	unused_1;
+	__le32	stat_ctx_id;
+	__le16	alloc_seq_id;
+	u8	unused_1[6];
 };
 
 /* hwrm_stat_ctx_alloc_output (size:128b/16B) */
@@ -9650,10 +9749,13 @@ struct hwrm_dbg_qcaps_output {
 	__le32	coredump_component_disable_caps;
 	#define DBG_QCAPS_RESP_COREDUMP_COMPONENT_DISABLE_CAPS_NVRAM     0x1UL
 	__le32	flags;
-	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_NVM          0x1UL
-	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR     0x2UL
-	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR      0x4UL
-	#define DBG_QCAPS_RESP_FLAGS_USEQ                   0x8UL
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_NVM             0x1UL
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR        0x2UL
+	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR         0x4UL
+	#define DBG_QCAPS_RESP_FLAGS_USEQ                      0x8UL
+	#define DBG_QCAPS_RESP_FLAGS_COREDUMP_HOST_DDR         0x10UL
+	#define DBG_QCAPS_RESP_FLAGS_COREDUMP_HOST_CAPTURE     0x20UL
+	#define DBG_QCAPS_RESP_FLAGS_PTRACE                    0x40UL
 	u8	unused_1[3];
 	u8	valid;
 };
@@ -10092,16 +10194,19 @@ struct hwrm_nvm_erase_dir_entry_output {
 	u8	valid;
 };
 
-/* hwrm_nvm_get_dev_info_input (size:128b/16B) */
+/* hwrm_nvm_get_dev_info_input (size:192b/24B) */
 struct hwrm_nvm_get_dev_info_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
 	__le16	seq_id;
 	__le16	target_id;
 	__le64	resp_addr;
+	u8	flags;
+	#define NVM_GET_DEV_INFO_REQ_FLAGS_SECURITY_SOC_NVM     0x1UL
+	u8	unused_0[7];
 };
 
-/* hwrm_nvm_get_dev_info_output (size:704b/88B) */
+/* hwrm_nvm_get_dev_info_output (size:768b/96B) */
 struct hwrm_nvm_get_dev_info_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -10135,6 +10240,10 @@ struct hwrm_nvm_get_dev_info_output {
 	__le16	netctrl_fw_minor;
 	__le16	netctrl_fw_build;
 	__le16	netctrl_fw_patch;
+	__le16	srt2_fw_major;
+	__le16	srt2_fw_minor;
+	__le16	srt2_fw_build;
+	__le16	srt2_fw_patch;
 	u8	unused_0[7];
 	u8	valid;
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 22898d3d088b..7bb8a5d74430 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -15,6 +15,7 @@
 #include <linux/if_vlan.h>
 #include <linux/interrupt.h>
 #include <linux/etherdevice.h>
+#include <net/dcbnl.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_hwrm.h"
@@ -196,11 +197,8 @@ int bnxt_get_vf_config(struct net_device *dev, int vf_id,
 		memcpy(&ivi->mac, vf->vf_mac_addr, ETH_ALEN);
 	ivi->max_tx_rate = vf->max_tx_rate;
 	ivi->min_tx_rate = vf->min_tx_rate;
-	ivi->vlan = vf->vlan;
-	if (vf->flags & BNXT_VF_QOS)
-		ivi->qos = vf->vlan >> VLAN_PRIO_SHIFT;
-	else
-		ivi->qos = 0;
+	ivi->vlan = vf->vlan & VLAN_VID_MASK;
+	ivi->qos = vf->vlan >> VLAN_PRIO_SHIFT;
 	ivi->spoofchk = !!(vf->flags & BNXT_VF_SPOOFCHK);
 	ivi->trusted = bnxt_is_trusted_vf(bp, vf);
 	if (!(vf->flags & BNXT_VF_LINK_FORCED))
@@ -256,21 +254,21 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
 	if (bp->hwrm_spec_code < 0x10201)
 		return -ENOTSUPP;
 
-	if (vlan_proto != htons(ETH_P_8021Q))
+	if (vlan_proto != htons(ETH_P_8021Q) &&
+	    (vlan_proto != htons(ETH_P_8021AD) ||
+	     !(bp->fw_cap & BNXT_FW_CAP_DFLT_VLAN_TPID_PCP)))
 		return -EPROTONOSUPPORT;
 
 	rc = bnxt_vf_ndo_prep(bp, vf_id);
 	if (rc)
 		return rc;
 
-	/* TODO: needed to implement proper handling of user priority,
-	 * currently fail the command if there is valid priority
-	 */
-	if (vlan_id > 4095 || qos)
+	if (vlan_id >= VLAN_N_VID || qos >= IEEE_8021Q_MAX_PRIORITIES ||
+	    (!vlan_id && qos))
 		return -EINVAL;
 
 	vf = &bp->pf.vf[vf_id];
-	vlan_tag = vlan_id;
+	vlan_tag = vlan_id | (u16)qos << VLAN_PRIO_SHIFT;
 	if (vlan_tag == vf->vlan)
 		return 0;
 
@@ -279,6 +277,10 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
 		req->fid = cpu_to_le16(vf->fw_fid);
 		req->dflt_vlan = cpu_to_le16(vlan_tag);
 		req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+		if (bp->fw_cap & BNXT_FW_CAP_DFLT_VLAN_TPID_PCP) {
+			req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_TPID);
+			req->tpid = vlan_proto;
+		}
 		rc = hwrm_req_send(bp, req);
 		if (!rc)
 			vf->vlan = vlan_tag;
@@ -900,11 +902,6 @@ int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!(bp->flags & BNXT_FLAG_USING_MSIX)) {
-		netdev_warn(dev, "Not allow SRIOV if the irq mode is not MSIX\n");
-		return 0;
-	}
-
 	rtnl_lock();
 	if (!netif_running(dev)) {
 		netdev_warn(dev, "Reject SRIOV config request since if is down!\n");
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index b9e7d3e7b15d..fdd6356f21ef 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -176,11 +176,17 @@ EXPORT_SYMBOL(bnxt_unregister_dev);
 
 static int bnxt_set_dflt_ulp_msix(struct bnxt *bp)
 {
-	u32 roce_msix = BNXT_VF(bp) ?
-			BNXT_MAX_VF_ROCE_MSIX : BNXT_MAX_ROCE_MSIX;
+	int roce_msix = BNXT_MAX_ROCE_MSIX;
 
-	return ((bp->flags & BNXT_FLAG_ROCE_CAP) ?
-		min_t(u32, roce_msix, num_online_cpus()) : 0);
+	if (BNXT_VF(bp))
+		roce_msix = BNXT_MAX_ROCE_MSIX_VF;
+	else if (bp->port_partition_type)
+		roce_msix = BNXT_MAX_ROCE_MSIX_NPAR_PF;
+
+	/* NQ MSIX vectors should match the number of CPUs plus 1 more for
+	 * the CREQ MSIX, up to the default.
+	 */
+	return min_t(int, roce_msix, num_online_cpus() + 1);
 }
 
 int bnxt_send_msg(struct bnxt_en_dev *edev,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index 4eafe6ec0abf..4f4914f5c84c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -15,8 +15,10 @@
 
 #define BNXT_MIN_ROCE_CP_RINGS	2
 #define BNXT_MIN_ROCE_STAT_CTXS	1
-#define BNXT_MAX_ROCE_MSIX	9
-#define BNXT_MAX_VF_ROCE_MSIX	2
+
+#define BNXT_MAX_ROCE_MSIX_VF		2
+#define BNXT_MAX_ROCE_MSIX_NPAR_PF	5
+#define BNXT_MAX_ROCE_MSIX		64
 
 struct hwrm_async_event_cmpl;
 struct bnxt;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 345681d5007e..f88b641533fc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		 * redirect is coming from a frame received by the
 		 * bnxt_en driver.
 		 */
-		rx_buf = &rxr->rx_buf_ring[cons];
-		mapping = rx_buf->mapping - bp->rx_dma_offset;
-		dma_unmap_page_attrs(&pdev->dev, mapping,
-				     BNXT_RX_PAGE_SIZE, bp->rx_dir,
-				     DMA_ATTR_WEAK_ORDERING);
 
 		/* if we are unable to allocate a new buffer, abort and reuse */
 		if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) {
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index c2b4188a1ef1..a9040c42d2ff 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -31,6 +31,7 @@
 #include <linux/if_vlan.h>
 #include <linux/prefetch.h>
 #include <linux/random.h>
+#include <linux/workqueue.h>
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define BCM_VLAN 1
 #endif
@@ -3015,9 +3016,9 @@ static int cnic_service_bnx2(void *data, void *status_blk)
 	return cnic_service_bnx2_queues(dev);
 }
 
-static void cnic_service_bnx2_msix(struct tasklet_struct *t)
+static void cnic_service_bnx2_msix(struct work_struct *work)
 {
-	struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+	struct cnic_local *cp = from_work(cp, work, cnic_irq_bh_work);
 	struct cnic_dev *dev = cp->dev;
 
 	cp->last_status_idx = cnic_service_bnx2_queues(dev);
@@ -3036,7 +3037,7 @@ static void cnic_doirq(struct cnic_dev *dev)
 		prefetch(cp->status_blk.gen);
 		prefetch(&cp->kcq1.kcq[KCQ_PG(prod)][KCQ_IDX(prod)]);
 
-		tasklet_schedule(&cp->cnic_irq_task);
+		queue_work(system_bh_wq, &cp->cnic_irq_bh_work);
 	}
 }
 
@@ -3140,9 +3141,9 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
 	return last_status;
 }
 
-static void cnic_service_bnx2x_bh(struct tasklet_struct *t)
+static void cnic_service_bnx2x_bh_work(struct work_struct *work)
 {
-	struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+	struct cnic_local *cp = from_work(cp, work, cnic_irq_bh_work);
 	struct cnic_dev *dev = cp->dev;
 	struct bnx2x *bp = netdev_priv(dev->netdev);
 	u32 status_idx, new_status_idx;
@@ -4428,7 +4429,7 @@ static void cnic_free_irq(struct cnic_dev *dev)
 
 	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
 		cp->disable_int_sync(dev);
-		tasklet_kill(&cp->cnic_irq_task);
+		cancel_work_sync(&cp->cnic_irq_bh_work);
 		free_irq(ethdev->irq_arr[0].vector, dev);
 	}
 }
@@ -4441,7 +4442,7 @@ static int cnic_request_irq(struct cnic_dev *dev)
 
 	err = request_irq(ethdev->irq_arr[0].vector, cnic_irq, 0, "cnic", dev);
 	if (err)
-		tasklet_disable(&cp->cnic_irq_task);
+		disable_work_sync(&cp->cnic_irq_bh_work);
 
 	return err;
 }
@@ -4464,7 +4465,7 @@ static int cnic_init_bnx2_irq(struct cnic_dev *dev)
 		CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220);
 
 		cp->last_status_idx = cp->status_blk.bnx2->status_idx;
-		tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2_msix);
+		INIT_WORK(&cp->cnic_irq_bh_work, cnic_service_bnx2_msix);
 		err = cnic_request_irq(dev);
 		if (err)
 			return err;
@@ -4873,7 +4874,7 @@ static int cnic_init_bnx2x_irq(struct cnic_dev *dev)
 	struct cnic_eth_dev *ethdev = cp->ethdev;
 	int err = 0;
 
-	tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2x_bh);
+	INIT_WORK(&cp->cnic_irq_bh_work, cnic_service_bnx2x_bh_work);
 	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
 		err = cnic_request_irq(dev);
 
diff --git a/drivers/net/ethernet/broadcom/cnic.h b/drivers/net/ethernet/broadcom/cnic.h
index fedc84ada937..1a314a75d2d2 100644
--- a/drivers/net/ethernet/broadcom/cnic.h
+++ b/drivers/net/ethernet/broadcom/cnic.h
@@ -268,7 +268,7 @@ struct cnic_local {
 	u32				bnx2x_igu_sb_id;
 	u32				int_num;
 	u32				last_status_idx;
-	struct tasklet_struct		cnic_irq_task;
+	struct work_struct		cnic_irq_bh_work;
 
 	struct kcqe		*completed_kcq[MAX_COMPLETED_KCQE];
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 1248792d7fd4..0715ea5bf13e 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device *kdev = &priv->pdev->dev;
 
-	if (dev->phydev) {
+	if (dev->phydev)
 		phy_ethtool_get_wol(dev->phydev, wol);
-		if (wol->supported)
-			return;
-	}
 
-	if (!device_can_wakeup(kdev)) {
-		wol->supported = 0;
-		wol->wolopts = 0;
+	/* MAC is not wake-up capable, return what the PHY does */
+	if (!device_can_wakeup(kdev))
 		return;
-	}
 
-	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
+	/* Overlay MAC capabilities with that of the PHY queried before */
+	wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
 	wol->wolopts = priv->wolopts;
 	memset(wol->sopass, 0, sizeof(wol->sopass));
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 0ec5f01551f9..378815917741 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6145,9 +6145,7 @@ static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
 {
 	struct tg3 *tp = netdev_priv(dev);
 
-	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
 
 	if (tg3_flag(tp, PTP_CAPABLE)) {
 		info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE |
@@ -6157,8 +6155,6 @@ static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
 
 	if (tp->ptp_clock)
 		info->phc_index = ptp_clock_index(tp->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
 
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index ea71612f6b36..5740c98d8c9f 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -13,6 +13,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/interrupt.h>
 #include <linux/phy/phy.h>
+#include <linux/workqueue.h>
 
 #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
 #define MACB_EXT_DESC
@@ -1330,7 +1331,7 @@ struct macb {
 	spinlock_t rx_fs_lock;
 	unsigned int max_tuples;
 
-	struct tasklet_struct	hresp_err_tasklet;
+	struct work_struct	hresp_err_bh_work;
 
 	int	rx_bd_rd_prefetch;
 	int	tx_bd_rd_prefetch;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 11665be3a22c..f06babec04a0 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1792,9 +1792,9 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void macb_hresp_error_task(struct tasklet_struct *t)
+static void macb_hresp_error_task(struct work_struct *work)
 {
-	struct macb *bp = from_tasklet(bp, t, hresp_err_tasklet);
+	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
 	struct net_device *dev = bp->dev;
 	struct macb_queue *queue;
 	unsigned int q;
@@ -1994,7 +1994,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		}
 
 		if (status & MACB_BIT(HRESP)) {
-			tasklet_schedule(&bp->hresp_err_tasklet);
+			queue_work(system_bh_wq, &bp->hresp_err_bh_work);
 			netdev_err(dev, "DMA bus error: HRESP not OK\n");
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
@@ -3410,8 +3410,6 @@ static int gem_get_ts_info(struct net_device *dev,
 
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -3423,7 +3421,8 @@ static int gem_get_ts_info(struct net_device *dev,
 		(1 << HWTSTAMP_FILTER_NONE) |
 		(1 << HWTSTAMP_FILTER_ALL);
 
-	info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1;
+	if (bp->ptp_clock)
+		info->phc_index = ptp_clock_index(bp->ptp_clock);
 
 	return 0;
 }
@@ -4184,6 +4183,8 @@ static int macb_init(struct platform_device *pdev)
 		dev->ethtool_ops = &macb_ethtool_ops;
 	}
 
+	netdev_sw_irq_coalesce_default_on(dev);
+
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	/* Set features */
@@ -5119,12 +5120,12 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_out_free_netdev;
 	}
 
-	/* MTU range: 68 - 1500 or 10240 */
+	/* MTU range: 68 - 1518 or 10240 */
 	dev->min_mtu = GEM_MTU_MIN_SIZE;
 	if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len)
 		dev->max_mtu = bp->jumbo_max_len - ETH_HLEN - ETH_FCS_LEN;
 	else
-		dev->max_mtu = ETH_DATA_LEN;
+		dev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
 
 	if (bp->caps & MACB_CAPS_BD_RD_PREFETCH) {
 		val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10));
@@ -5172,7 +5173,7 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_out_unregister_mdio;
 	}
 
-	tasklet_setup(&bp->hresp_err_tasklet, macb_hresp_error_task);
+	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
 
 	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
@@ -5216,7 +5217,7 @@ static void macb_remove(struct platform_device *pdev)
 		mdiobus_free(bp->mii_bus);
 
 		unregister_netdev(dev);
-		tasklet_kill(&bp->hresp_err_tasklet);
+		cancel_work_sync(&bp->hresp_err_bh_work);
 		pm_runtime_disable(&pdev->dev);
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
 		if (!pm_runtime_suspended(&pdev->dev)) {
@@ -5250,8 +5251,8 @@ static int __maybe_unused macb_suspend(struct device *dev)
 	if (bp->wol & MACB_WOL_ENABLED) {
 		/* Check for IP address in WOL ARP mode */
 		idev = __in_dev_get_rcu(bp->dev);
-		if (idev && idev->ifa_list)
-			ifa = rcu_access_pointer(idev->ifa_list);
+		if (idev)
+			ifa = rcu_dereference(idev->ifa_list);
 		if ((bp->wolopts & WAKE_ARP) && !ifa) {
 			netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n");
 			return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index f66d22de5168..fc4f5aee6ab3 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -19,8 +19,7 @@
 #define PCI_DRIVER_NAME "macb_pci"
 #define PLAT_DRIVER_NAME "macb"
 
-#define CDNS_VENDOR_ID 0x17cd
-#define CDNS_DEVICE_ID 0xe007
+#define PCI_DEVICE_ID_CDNS_MACB 0xe007
 
 #define GEM_PCLK_RATE 50000000
 #define GEM_HCLK_RATE 50000000
@@ -117,7 +116,7 @@ static void macb_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id dev_id_table[] = {
-	{ PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
+	{ PCI_VDEVICE(CDNS, PCI_DEVICE_ID_CDNS_MACB) },
 	{ 0, }
 };
 
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
index 2d06097d3f61..40f529d0bc4c 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
@@ -43,6 +43,4 @@ int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct);
 int cn23xx_setup_octeon_vf_device(struct octeon_device *oct);
 
 u32 cn23xx_vf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
-
-void cn23xx_dump_vf_initialized_regs(struct octeon_device *oct);
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
index 8ed57134ee0c..129c8b84f549 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
@@ -86,7 +86,6 @@ u32
 lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq);
 void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, u8 unused);
 void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, u8 unused);
-void cn6xxx_get_pcie_qlmport(struct octeon_device *oct);
 void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip,
 				  struct octeon_reg_list *reg_list);
 u32 lio_cn6xxx_coprocessor_clock(struct octeon_device *oct);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 5835965dbc32..c849e2c871a9 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -2496,37 +2496,31 @@ ret_intrmod:
 	return ret;
 }
 
+#ifdef PTP_HARDWARE_TIMESTAMPING
 static int lio_get_ts_info(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info)
 {
 	struct lio *lio = GET_LIO(netdev);
 
 	info->so_timestamping =
-#ifdef PTP_HARDWARE_TIMESTAMPING
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE |
-		SOF_TIMESTAMPING_TX_SOFTWARE |
-#endif
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE;
+		SOF_TIMESTAMPING_TX_SOFTWARE;
 
 	if (lio->ptp_clock)
 		info->phc_index = ptp_clock_index(lio->ptp_clock);
-	else
-		info->phc_index = -1;
 
-#ifdef PTP_HARDWARE_TIMESTAMPING
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
 
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 			   (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
 			   (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
 			   (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
-#endif
 
 	return 0;
 }
+#endif
 
 /* Return register dump len. */
 static int lio_get_regs_len(struct net_device *dev)
@@ -3146,7 +3140,9 @@ static const struct ethtool_ops lio_ethtool_ops = {
 	.set_coalesce		= lio_set_intr_coalesce,
 	.get_priv_flags		= lio_get_priv_flags,
 	.set_priv_flags		= lio_set_priv_flags,
+#ifdef PTP_HARDWARE_TIMESTAMPING
 	.get_ts_info		= lio_get_ts_info,
+#endif
 };
 
 static const struct ethtool_ops lio_vf_ethtool_ops = {
@@ -3169,7 +3165,9 @@ static const struct ethtool_ops lio_vf_ethtool_ops = {
 	.set_coalesce		= lio_set_intr_coalesce,
 	.get_priv_flags		= lio_get_priv_flags,
 	.set_priv_flags		= lio_set_priv_flags,
+#ifdef PTP_HARDWARE_TIMESTAMPING
 	.get_ts_info		= lio_get_ts_info,
+#endif
 };
 
 void liquidio_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index fb380b4f3e02..d26364c2ac81 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -804,13 +804,6 @@ int octeon_init_consoles(struct octeon_device *oct);
 int octeon_add_console(struct octeon_device *oct, u32 console_num,
 		       char *dbg_enb);
 
-/** write or read from a console */
-int octeon_console_write(struct octeon_device *oct, u32 console_num,
-			 char *buffer, u32 write_request_size, u32 flags);
-int octeon_console_write_avail(struct octeon_device *oct, u32 console_num);
-
-int octeon_console_read_avail(struct octeon_device *oct, u32 console_num);
-
 /** Removes all attached consoles. */
 void octeon_remove_consoles(struct octeon_device *oct);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index c9b19e624dce..232ae72c0e37 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -395,8 +395,6 @@ int octeon_register_dispatch_fn(struct octeon_device *oct,
 void *octeon_get_dispatch_arg(struct octeon_device *oct,
 			      u16 opcode, u16 subcode);
 
-void octeon_droq_print_stats(void);
-
 u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq);
 
 int octeon_create_droq(struct octeon_device *oct, u32 q_no,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index bebf3bd349c6..a04f36a0e1a0 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -378,9 +378,6 @@ int octeon_send_command(struct octeon_device *oct, u32 iq_no,
 			u32 force_db, void *cmd, void *buf,
 			u32 datasize, u32 reqtype);
 
-void octeon_dump_soft_command(struct octeon_device *oct,
-			      struct octeon_soft_command *sc);
-
 void octeon_prepare_soft_command(struct octeon_device *oct,
 				 struct octeon_soft_command *sc,
 				 u8 opcode, u8 subcode,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 6a04d2530176..d0ff0c170b1a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -844,8 +844,6 @@ static int nicvf_get_ts_info(struct net_device *netdev,
 		return ethtool_op_get_ts_info(netdev, info);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 8453defc296c..b7531041c56d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -359,8 +359,6 @@ int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx);
 /* Register access APIs */
 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val);
 u64  nicvf_reg_read(struct nicvf *nic, u64 offset);
-void nicvf_qset_reg_write(struct nicvf *nic, u64 offset, u64 val);
-u64 nicvf_qset_reg_read(struct nicvf *nic, u64 offset);
 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
 			   u64 qidx, u64 val);
 u64  nicvf_queue_reg_read(struct nicvf *nic,
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index a40c266c37f2..608cc6af5af1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1054,18 +1054,12 @@ static int phy_interface_mode(u8 lmac_type)
 
 static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 {
-	struct lmac *lmac, **priv;
+	struct lmac *lmac;
 	u64 cfg;
 
 	lmac = &bgx->lmac[lmacid];
 	lmac->bgx = bgx;
 
-	lmac->netdev = alloc_netdev_dummy(sizeof(struct lmac *));
-	if (!lmac->netdev)
-		return -ENOMEM;
-	priv = netdev_priv(lmac->netdev);
-	*priv = lmac;
-
 	if ((lmac->lmac_type == BGX_MODE_SGMII) ||
 	    (lmac->lmac_type == BGX_MODE_QSGMII) ||
 	    (lmac->lmac_type == BGX_MODE_RGMII)) {
@@ -1191,7 +1185,6 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 	    (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
 		phy_disconnect(lmac->phydev);
 
-	free_netdev(lmac->netdev);
 	lmac->phydev = NULL;
 }
 
@@ -1653,6 +1646,23 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	bgx_get_qlm_mode(bgx);
 
+	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+		struct lmac *lmacp, **priv;
+
+		lmacp = &bgx->lmac[lmac];
+		lmacp->netdev = alloc_netdev_dummy(sizeof(struct lmac *));
+
+		if (!lmacp->netdev) {
+			for (int i = 0; i < lmac; i++)
+				free_netdev(bgx->lmac[i].netdev);
+			err = -ENOMEM;
+			goto err_enable;
+		}
+
+		priv = netdev_priv(lmacp->netdev);
+		*priv = lmacp;
+	}
+
 	err = bgx_init_phy(bgx);
 	if (err)
 		goto err_enable;
@@ -1692,8 +1702,10 @@ static void bgx_remove(struct pci_dev *pdev)
 	u8 lmac;
 
 	/* Disable all LMACs */
-	for (lmac = 0; lmac < bgx->lmac_count; lmac++)
+	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
 		bgx_lmac_disable(bgx, lmac);
+		free_netdev(bgx->lmac[lmac].netdev);
+	}
 
 	pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index cdea49392185..84f16ababaee 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -219,9 +219,7 @@
 void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 mac, u8 vf);
 void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf);
 void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode);
-void octeon_mdiobus_force_mod_depencency(void);
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
-void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
 unsigned bgx_get_map(int node);
 int bgx_get_lmac_count(int node, int bgx);
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid);
diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
index e56eff701395..304bb282ab03 100644
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
@@ -329,8 +329,6 @@ irqreturn_t t1_slow_intr_handler(adapter_t *adapter);
 
 int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc);
 const struct board_info *t1_get_board_info(unsigned int board_id);
-const struct board_info *t1_get_board_info_from_ids(unsigned int devid,
-						    unsigned short ssid);
 int t1_seeprom_read(adapter_t *adapter, u32 addr, __le32 *data);
 int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi,
 		     struct adapter_params *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 7d7d3e0098df..3b7068832f95 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -1034,7 +1034,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM |
 			NETIF_F_RXCSUM;
 		netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM |
-			NETIF_F_RXCSUM | NETIF_F_LLTX | NETIF_F_HIGHDMA;
+			NETIF_F_RXCSUM | NETIF_F_HIGHDMA;
+		netdev->lltx = true;
 
 		if (vlan_tso_capable(adapter)) {
 			netdev->features |=
diff --git a/drivers/net/ethernet/chelsio/cxgb/tp.h b/drivers/net/ethernet/chelsio/cxgb/tp.h
index ba15675d56df..64f93dcc676b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/tp.h
+++ b/drivers/net/ethernet/chelsio/cxgb/tp.h
@@ -65,9 +65,7 @@ void t1_tp_intr_enable(struct petp *tp);
 void t1_tp_intr_clear(struct petp *tp);
 int t1_tp_intr_handler(struct petp *tp);
 
-void t1_tp_get_mib_statistics(adapter_t *adap, struct tp_mib_statistics *tps);
 void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable);
 void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable);
-int t1_tp_set_coalescing_size(struct petp *tp, unsigned int size);
 int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk);
 #endif
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
index f04e81f33795..a08fc762a438 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
@@ -106,6 +106,4 @@ static inline struct t3c_tid_entry *lookup_atid(const struct tid_info *t,
 	return &e->t3c_tid;
 }
 
-int attach_t3cdev(struct t3cdev *dev);
-void detach_t3cdev(struct t3cdev *dev);
 #endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index fca9533bc011..bbf7641a0fc7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1958,11 +1958,6 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf);
 void t4_get_chan_txrate(struct adapter *adap, u64 *nic_rate, u64 *ofld_rate);
 void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid);
 
-void t4_wol_magic_enable(struct adapter *adap, unsigned int port,
-			 const u8 *addr);
-int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map,
-		      u64 mask0, u64 mask1, unsigned int crc, bool enable);
-
 int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 		enum dev_master master, enum dev_state *state);
 int t4_fw_bye(struct adapter *adap, unsigned int mbox);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index 80c6627fe981..c80a93347a8c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
@@ -122,7 +122,6 @@ void cxgb4_dcb_version_init(struct net_device *);
 void cxgb4_dcb_reset(struct net_device *dev);
 void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input);
 void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *);
-void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *);
 extern const struct dcbnl_rtnl_ops cxgb4_dcb_ops;
 
 static inline __u8 bitswap_1(unsigned char val)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 3d091947ae00..7f3f5afa864f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1556,12 +1556,9 @@ static int get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *ts
 	struct  adapter *adapter = pi->adapter;
 
 	ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				   SOF_TIMESTAMPING_RX_SOFTWARE |
-				   SOF_TIMESTAMPING_SOFTWARE;
-
-	ts_info->so_timestamping |= SOF_TIMESTAMPING_RX_HARDWARE |
-				    SOF_TIMESTAMPING_TX_HARDWARE |
-				    SOF_TIMESTAMPING_RAW_HARDWARE;
+				   SOF_TIMESTAMPING_RX_HARDWARE |
+				   SOF_TIMESTAMPING_TX_HARDWARE |
+				   SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	ts_info->tx_types = (1 << HWTSTAMP_TX_OFF) |
 			    (1 << HWTSTAMP_TX_ON);
@@ -1575,8 +1572,6 @@ static int get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *ts
 
 	if (adapter->ptp_clock)
 		ts_info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		ts_info->phc_index = -1;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
index 33b2c0c45509..f6f745f5c022 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
@@ -81,8 +81,7 @@ int cxgb_fcoe_enable(struct net_device *netdev)
 
 	netdev->features |= NETIF_F_FCOE_CRC;
 	netdev->vlan_features |= NETIF_F_FCOE_CRC;
-	netdev->features |= NETIF_F_FCOE_MTU;
-	netdev->vlan_features |= NETIF_F_FCOE_MTU;
+	netdev->fcoe_mtu = true;
 
 	netdev_features_change(netdev);
 
@@ -112,8 +111,7 @@ int cxgb_fcoe_disable(struct net_device *netdev)
 
 	netdev->features &= ~NETIF_F_FCOE_CRC;
 	netdev->vlan_features &= ~NETIF_F_FCOE_CRC;
-	netdev->features &= ~NETIF_F_FCOE_MTU;
-	netdev->vlan_features &= ~NETIF_F_FCOE_MTU;
+	netdev->fcoe_mtu = false;
 
 	netdev_features_change(netdev);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 786ceae34488..dd9e68465e69 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1244,7 +1244,8 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs,
 	 * in the Compressed Filter Tuple.
 	 */
 	if (tp->vlan_shift >= 0 && fs->mask.ivlan)
-		ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift;
+		ntuple |= (u64)(FT_VLAN_VLD_F |
+				fs->val.ivlan) << tp->vlan_shift;
 
 	if (tp->port_shift >= 0 && fs->mask.iport)
 		ntuple |= (u64)fs->val.iport << tp->port_shift;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
index 9050568a034c..64663112cad8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
@@ -242,7 +242,7 @@ struct cxgb4_next_header {
 	 * field's value to jump to next header such as IHL field
 	 * in IPv4 header.
 	 */
-	struct tc_u32_sel sel;
+	struct tc_u32_sel_hdr sel;
 	struct tc_u32_key key;
 	/* location of jump to make */
 	const struct cxgb4_match_field *jump;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a9599ba26975..d8cafaa7ddb4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -508,7 +508,6 @@ unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
 unsigned int cxgb4_port_e2cchan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
-unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
 unsigned int cxgb4_port_idx(const struct net_device *dev);
 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
 			    unsigned int *idx);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index 854d87e1125c..2e3973a32d9d 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -342,10 +342,10 @@ int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
 }
 EXPORT_SYMBOL(cxgbi_ppm_release);
 
-static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
-						 unsigned int *pcpu_ppmax)
+static struct cxgbi_ppm_pool __percpu *
+ppm_alloc_cpu_pool(unsigned int *total, unsigned int *pcpu_ppmax)
 {
-	struct cxgbi_ppm_pool *pools;
+	struct cxgbi_ppm_pool __percpu *pools;
 	unsigned int ppmax = (*total) / num_possible_cpus();
 	unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
 	unsigned int bmap;
@@ -392,7 +392,7 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 		   unsigned int iscsi_edram_size)
 {
 	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
-	struct cxgbi_ppm_pool *pool = NULL;
+	struct cxgbi_ppm_pool __percpu *pool = NULL;
 	unsigned int pool_index_max = 0;
 	unsigned int ppmax_pool = 0;
 	unsigned int ppod_bmap_size;
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 1f495cfd7959..c2007cd86416 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -16,13 +16,12 @@
 #include <linux/ethtool.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
-#include <linux/platform_data/eth-ep93xx.h>
-
 #define DRV_MODULE_NAME		"ep93xx-eth"
 
 #define RX_QUEUE_ENTRIES	64
@@ -738,25 +737,6 @@ static const struct net_device_ops ep93xx_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
-static struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data)
-{
-	struct net_device *dev;
-
-	dev = alloc_etherdev(sizeof(struct ep93xx_priv));
-	if (dev == NULL)
-		return NULL;
-
-	eth_hw_addr_set(dev, data->dev_addr);
-
-	dev->ethtool_ops = &ep93xx_ethtool_ops;
-	dev->netdev_ops = &ep93xx_netdev_ops;
-
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
-
-	return dev;
-}
-
-
 static void ep93xx_eth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
@@ -786,27 +766,49 @@ static void ep93xx_eth_remove(struct platform_device *pdev)
 
 static int ep93xx_eth_probe(struct platform_device *pdev)
 {
-	struct ep93xx_eth_data *data;
 	struct net_device *dev;
 	struct ep93xx_priv *ep;
 	struct resource *mem;
+	void __iomem *base_addr;
+	struct device_node *np;
+	u8 addr[ETH_ALEN];
+	u32 phy_id;
 	int irq;
 	int err;
 
 	if (pdev == NULL)
 		return -ENODEV;
-	data = dev_get_platdata(&pdev->dev);
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (!mem || irq < 0)
 		return -ENXIO;
 
-	dev = ep93xx_dev_alloc(data);
+	base_addr = ioremap(mem->start, resource_size(mem));
+	if (!base_addr)
+		return dev_err_probe(&pdev->dev, -EIO, "Failed to ioremap ethernet registers\n");
+
+	np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!np)
+		return dev_err_probe(&pdev->dev, -ENODEV, "Please provide \"phy-handle\"\n");
+
+	err = of_property_read_u32(np, "reg", &phy_id);
+	of_node_put(np);
+	if (err)
+		return dev_err_probe(&pdev->dev, -ENOENT, "Failed to locate \"phy_id\"\n");
+
+	dev = alloc_etherdev(sizeof(struct ep93xx_priv));
 	if (dev == NULL) {
 		err = -ENOMEM;
 		goto err_out;
 	}
+
+	memcpy_fromio(addr, base_addr + 0x50, ETH_ALEN);
+	eth_hw_addr_set(dev, addr);
+	dev->ethtool_ops = &ep93xx_ethtool_ops;
+	dev->netdev_ops = &ep93xx_netdev_ops;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
+
 	ep = netdev_priv(dev);
 	ep->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
@@ -822,15 +824,10 @@ static int ep93xx_eth_probe(struct platform_device *pdev)
 		goto err_out;
 	}
 
-	ep->base_addr = ioremap(mem->start, resource_size(mem));
-	if (ep->base_addr == NULL) {
-		dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n");
-		err = -EIO;
-		goto err_out;
-	}
+	ep->base_addr = base_addr;
 	ep->irq = irq;
 
-	ep->mii.phy_id = data->phy_id;
+	ep->mii.phy_id = phy_id;
 	ep->mii.phy_id_mask = 0x1f;
 	ep->mii.reg_num_mask = 0x1f;
 	ep->mii.dev = dev;
@@ -857,12 +854,18 @@ err_out:
 	return err;
 }
 
+static const struct of_device_id ep93xx_eth_of_ids[] = {
+	{ .compatible = "cirrus,ep9301-eth" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ep93xx_eth_of_ids);
 
 static struct platform_driver ep93xx_eth_driver = {
 	.probe		= ep93xx_eth_probe,
 	.remove_new	= ep93xx_eth_remove,
 	.driver		= {
 		.name	= "ep93xx-eth",
+		.of_match_table = ep93xx_eth_of_ids,
 	},
 };
 
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 300ad05ee05b..0cc3644ee855 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -128,6 +128,40 @@ struct vxlan_offload {
 	u8 flags;
 };
 
+struct enic_wq_stats {
+	u64 packets;		/* pkts queued for Tx */
+	u64 stopped;		/* Tx ring almost full, queue stopped */
+	u64 wake;		/* Tx ring no longer full, queue woken up*/
+	u64 tso;		/* non-encap tso pkt */
+	u64 encap_tso;		/* encap tso pkt */
+	u64 encap_csum;		/* encap HW csum */
+	u64 csum_partial;	/* skb->ip_summed = CHECKSUM_PARTIAL */
+	u64 csum_none;		/* HW csum not required */
+	u64 bytes;		/* bytes queued for Tx */
+	u64 add_vlan;		/* HW adds vlan tag */
+	u64 cq_work;		/* Tx completions processed */
+	u64 cq_bytes;		/* Tx bytes processed */
+	u64 null_pkt;		/* skb length <= 0 */
+	u64 skb_linear_fail;	/* linearize failures */
+	u64 desc_full_awake;	/* TX ring full while queue awake */
+};
+
+struct enic_rq_stats {
+	u64 packets;			/* pkts received */
+	u64 bytes;			/* bytes received */
+	u64 l4_rss_hash;		/* hashed on l4 */
+	u64 l3_rss_hash;		/* hashed on l3 */
+	u64 csum_unnecessary;		/* HW verified csum */
+	u64 csum_unnecessary_encap;	/* HW verified csum on encap packet */
+	u64 vlan_stripped;		/* HW stripped vlan */
+	u64 napi_complete;		/* napi complete intr reenabled */
+	u64 napi_repoll;		/* napi poll again */
+	u64 bad_fcs;			/* bad pkts */
+	u64 pkt_truncated;		/* truncated pkts */
+	u64 no_skb;			/* out of skbs */
+	u64 desc_skip;			/* Rx pkt went into later buffer */
+};
+
 /* Per-instance private data structure */
 struct enic {
 	struct net_device *netdev;
@@ -162,16 +196,16 @@ struct enic {
 	/* work queue cache line section */
 	____cacheline_aligned struct vnic_wq wq[ENIC_WQ_MAX];
 	spinlock_t wq_lock[ENIC_WQ_MAX];
+	struct enic_wq_stats wq_stats[ENIC_WQ_MAX];
 	unsigned int wq_count;
 	u16 loop_enable;
 	u16 loop_tag;
 
 	/* receive queue cache line section */
 	____cacheline_aligned struct vnic_rq rq[ENIC_RQ_MAX];
+	struct enic_rq_stats rq_stats[ENIC_RQ_MAX];
 	unsigned int rq_count;
 	struct vxlan_offload vxlan;
-	u64 rq_truncated_pkts;
-	u64 rq_bad_fcs;
 	struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX];
 
 	/* interrupt resource cache line section */
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index f2f1055880b2..f7986f2b6a17 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -32,6 +32,41 @@ struct enic_stat {
 	.index = offsetof(struct vnic_gen_stats, stat) / sizeof(u64)\
 }
 
+#define ENIC_PER_RQ_STAT(stat) { \
+	.name = "rq[%d]_"#stat, \
+	.index = offsetof(struct enic_rq_stats, stat) / sizeof(u64) \
+}
+
+#define ENIC_PER_WQ_STAT(stat) { \
+	.name = "wq[%d]_"#stat, \
+	.index = offsetof(struct enic_wq_stats, stat) / sizeof(u64) \
+}
+
+static const struct enic_stat enic_per_rq_stats[] = {
+	ENIC_PER_RQ_STAT(l4_rss_hash),
+	ENIC_PER_RQ_STAT(l3_rss_hash),
+	ENIC_PER_RQ_STAT(csum_unnecessary_encap),
+	ENIC_PER_RQ_STAT(vlan_stripped),
+	ENIC_PER_RQ_STAT(napi_complete),
+	ENIC_PER_RQ_STAT(napi_repoll),
+	ENIC_PER_RQ_STAT(no_skb),
+	ENIC_PER_RQ_STAT(desc_skip),
+};
+
+#define NUM_ENIC_PER_RQ_STATS   ARRAY_SIZE(enic_per_rq_stats)
+
+static const struct enic_stat enic_per_wq_stats[] = {
+	ENIC_PER_WQ_STAT(encap_tso),
+	ENIC_PER_WQ_STAT(encap_csum),
+	ENIC_PER_WQ_STAT(add_vlan),
+	ENIC_PER_WQ_STAT(cq_work),
+	ENIC_PER_WQ_STAT(cq_bytes),
+	ENIC_PER_WQ_STAT(null_pkt),
+	ENIC_PER_WQ_STAT(skb_linear_fail),
+	ENIC_PER_WQ_STAT(desc_full_awake),
+};
+
+#define NUM_ENIC_PER_WQ_STATS   ARRAY_SIZE(enic_per_wq_stats)
 static const struct enic_stat enic_tx_stats[] = {
 	ENIC_TX_STAT(tx_frames_ok),
 	ENIC_TX_STAT(tx_unicast_frames_ok),
@@ -46,6 +81,8 @@ static const struct enic_stat enic_tx_stats[] = {
 	ENIC_TX_STAT(tx_tso),
 };
 
+#define NUM_ENIC_TX_STATS	ARRAY_SIZE(enic_tx_stats)
+
 static const struct enic_stat enic_rx_stats[] = {
 	ENIC_RX_STAT(rx_frames_ok),
 	ENIC_RX_STAT(rx_frames_total),
@@ -70,13 +107,13 @@ static const struct enic_stat enic_rx_stats[] = {
 	ENIC_RX_STAT(rx_frames_to_max),
 };
 
+#define NUM_ENIC_RX_STATS	ARRAY_SIZE(enic_rx_stats)
+
 static const struct enic_stat enic_gen_stats[] = {
 	ENIC_GEN_STAT(dma_map_error),
 };
 
-static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
-static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
-static const unsigned int enic_n_gen_stats = ARRAY_SIZE(enic_gen_stats);
+#define NUM_ENIC_GEN_STATS	ARRAY_SIZE(enic_gen_stats)
 
 static void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
 {
@@ -141,22 +178,38 @@ static void enic_get_drvinfo(struct net_device *netdev,
 static void enic_get_strings(struct net_device *netdev, u32 stringset,
 	u8 *data)
 {
+	struct enic *enic = netdev_priv(netdev);
 	unsigned int i;
+	unsigned int j;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < enic_n_tx_stats; i++) {
+		for (i = 0; i < NUM_ENIC_TX_STATS; i++) {
 			memcpy(data, enic_tx_stats[i].name, ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		}
-		for (i = 0; i < enic_n_rx_stats; i++) {
+		for (i = 0; i < NUM_ENIC_RX_STATS; i++) {
 			memcpy(data, enic_rx_stats[i].name, ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		}
-		for (i = 0; i < enic_n_gen_stats; i++) {
+		for (i = 0; i < NUM_ENIC_GEN_STATS; i++) {
 			memcpy(data, enic_gen_stats[i].name, ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < enic->rq_count; i++) {
+			for (j = 0; j < NUM_ENIC_PER_RQ_STATS; j++) {
+				snprintf(data, ETH_GSTRING_LEN,
+					 enic_per_rq_stats[j].name, i);
+				data += ETH_GSTRING_LEN;
+			}
+		}
+		for (i = 0; i < enic->wq_count; i++) {
+			for (j = 0; j < NUM_ENIC_PER_WQ_STATS; j++) {
+				snprintf(data, ETH_GSTRING_LEN,
+					 enic_per_wq_stats[j].name, i);
+				data += ETH_GSTRING_LEN;
+			}
+		}
 		break;
 	}
 }
@@ -242,9 +295,19 @@ err_out:
 
 static int enic_get_sset_count(struct net_device *netdev, int sset)
 {
+	struct enic *enic = netdev_priv(netdev);
+	unsigned int n_per_rq_stats;
+	unsigned int n_per_wq_stats;
+	unsigned int n_stats;
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return enic_n_tx_stats + enic_n_rx_stats + enic_n_gen_stats;
+		n_per_rq_stats = NUM_ENIC_PER_RQ_STATS * enic->rq_count;
+		n_per_wq_stats = NUM_ENIC_PER_WQ_STATS * enic->wq_count;
+		n_stats = NUM_ENIC_TX_STATS + NUM_ENIC_RX_STATS +
+			NUM_ENIC_GEN_STATS +
+			n_per_rq_stats + n_per_wq_stats;
+		return n_stats;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -256,6 +319,7 @@ static void enic_get_ethtool_stats(struct net_device *netdev,
 	struct enic *enic = netdev_priv(netdev);
 	struct vnic_stats *vstats;
 	unsigned int i;
+	unsigned int j;
 	int err;
 
 	err = enic_dev_stats_dump(enic, &vstats);
@@ -266,12 +330,30 @@ static void enic_get_ethtool_stats(struct net_device *netdev,
 	if (err == -ENOMEM)
 		return;
 
-	for (i = 0; i < enic_n_tx_stats; i++)
+	for (i = 0; i < NUM_ENIC_TX_STATS; i++)
 		*(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index];
-	for (i = 0; i < enic_n_rx_stats; i++)
+	for (i = 0; i < NUM_ENIC_RX_STATS; i++)
 		*(data++) = ((u64 *)&vstats->rx)[enic_rx_stats[i].index];
-	for (i = 0; i < enic_n_gen_stats; i++)
+	for (i = 0; i < NUM_ENIC_GEN_STATS; i++)
 		*(data++) = ((u64 *)&enic->gen_stats)[enic_gen_stats[i].index];
+	for (i = 0; i < enic->rq_count; i++) {
+		struct enic_rq_stats *rqstats = &enic->rq_stats[i];
+		int index;
+
+		for (j = 0; j < NUM_ENIC_PER_RQ_STATS; j++) {
+			index = enic_per_rq_stats[j].index;
+			*(data++) = ((u64 *)rqstats)[index];
+		}
+	}
+	for (i = 0; i < enic->wq_count; i++) {
+		struct enic_wq_stats *wqstats = &enic->wq_stats[i];
+		int index;
+
+		for (j = 0; j < NUM_ENIC_PER_WQ_STATS; j++) {
+			index = enic_per_wq_stats[j].index;
+			*(data++) = ((u64 *)wqstats)[index];
+		}
+	}
 }
 
 static u32 enic_get_msglevel(struct net_device *netdev)
@@ -601,9 +683,7 @@ static int enic_set_rxfh(struct net_device *netdev,
 static int enic_get_ts_info(struct net_device *netdev,
 			    struct kernel_ethtool_ts_info *info)
 {
-	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 5f26fc3ad655..ffed14b63d41 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -46,6 +46,7 @@
 #include <linux/crash_dump.h>
 #include <net/busy_poll.h>
 #include <net/vxlan.h>
+#include <net/netdev_queues.h>
 
 #include "cq_enet_desc.h"
 #include "vnic_dev.h"
@@ -339,6 +340,10 @@ static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
 static void enic_wq_free_buf(struct vnic_wq *wq,
 	struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque)
 {
+	struct enic *enic = vnic_dev_priv(wq->vdev);
+
+	enic->wq_stats[wq->index].cq_work++;
+	enic->wq_stats[wq->index].cq_bytes += buf->len;
 	enic_free_wq_buf(wq, buf);
 }
 
@@ -355,8 +360,10 @@ static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
 
 	if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) &&
 	    vnic_wq_desc_avail(&enic->wq[q_number]) >=
-	    (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS))
+	    (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) {
 		netif_wake_subqueue(enic->netdev, q_number);
+		enic->wq_stats[q_number].wake++;
+	}
 
 	spin_unlock(&enic->wq_lock[q_number]);
 
@@ -590,6 +597,11 @@ static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq,
 	if (!eop)
 		err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 
+	/* The enic_queue_wq_desc() above does not do HW checksum */
+	enic->wq_stats[wq->index].csum_none++;
+	enic->wq_stats[wq->index].packets++;
+	enic->wq_stats[wq->index].bytes += skb->len;
+
 	return err;
 }
 
@@ -622,6 +634,10 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 	if (!eop)
 		err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 
+	enic->wq_stats[wq->index].csum_partial++;
+	enic->wq_stats[wq->index].packets++;
+	enic->wq_stats[wq->index].bytes += skb->len;
+
 	return err;
 }
 
@@ -676,15 +692,18 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
 	unsigned int offset = 0;
 	unsigned int hdr_len;
 	dma_addr_t dma_addr;
+	unsigned int pkts;
 	unsigned int len;
 	skb_frag_t *frag;
 
 	if (skb->encapsulation) {
 		hdr_len = skb_inner_tcp_all_headers(skb);
 		enic_preload_tcp_csum_encap(skb);
+		enic->wq_stats[wq->index].encap_tso++;
 	} else {
 		hdr_len = skb_tcp_all_headers(skb);
 		enic_preload_tcp_csum(skb);
+		enic->wq_stats[wq->index].tso++;
 	}
 
 	/* Queue WQ_ENET_MAX_DESC_LEN length descriptors
@@ -705,7 +724,7 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
 	}
 
 	if (eop)
-		return 0;
+		goto tso_out_stats;
 
 	/* Queue WQ_ENET_MAX_DESC_LEN length descriptors
 	 * for additional data fragments
@@ -732,6 +751,15 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
 		}
 	}
 
+tso_out_stats:
+	/* calculate how many packets tso sent */
+	len = skb->len - hdr_len;
+	pkts = len / mss;
+	if ((len % mss) > 0)
+		pkts++;
+	enic->wq_stats[wq->index].packets += pkts;
+	enic->wq_stats[wq->index].bytes += (len + (pkts * hdr_len));
+
 	return 0;
 }
 
@@ -764,6 +792,10 @@ static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq,
 	if (!eop)
 		err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 
+	enic->wq_stats[wq->index].encap_csum++;
+	enic->wq_stats[wq->index].packets++;
+	enic->wq_stats[wq->index].bytes += skb->len;
+
 	return err;
 }
 
@@ -780,6 +812,7 @@ static inline int enic_queue_wq_skb(struct enic *enic,
 		/* VLAN tag from trunking driver */
 		vlan_tag_insert = 1;
 		vlan_tag = skb_vlan_tag_get(skb);
+		enic->wq_stats[wq->index].add_vlan++;
 	} else if (enic->loop_enable) {
 		vlan_tag = enic->loop_tag;
 		loopback = 1;
@@ -792,7 +825,7 @@ static inline int enic_queue_wq_skb(struct enic *enic,
 	else if (skb->encapsulation)
 		err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert,
 					      vlan_tag, loopback);
-	else if	(skb->ip_summed == CHECKSUM_PARTIAL)
+	else if (skb->ip_summed == CHECKSUM_PARTIAL)
 		err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert,
 						vlan_tag, loopback);
 	else
@@ -825,13 +858,15 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
 	unsigned int txq_map;
 	struct netdev_queue *txq;
 
+	txq_map = skb_get_queue_mapping(skb) % enic->wq_count;
+	wq = &enic->wq[txq_map];
+
 	if (skb->len <= 0) {
 		dev_kfree_skb_any(skb);
+		enic->wq_stats[wq->index].null_pkt++;
 		return NETDEV_TX_OK;
 	}
 
-	txq_map = skb_get_queue_mapping(skb) % enic->wq_count;
-	wq = &enic->wq[txq_map];
 	txq = netdev_get_tx_queue(netdev, txq_map);
 
 	/* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs,
@@ -843,6 +878,7 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
 	    skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC &&
 	    skb_linearize(skb)) {
 		dev_kfree_skb_any(skb);
+		enic->wq_stats[wq->index].skb_linear_fail++;
 		return NETDEV_TX_OK;
 	}
 
@@ -854,14 +890,17 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
 		/* This is a hard error, log it */
 		netdev_err(netdev, "BUG! Tx ring full when queue awake!\n");
 		spin_unlock(&enic->wq_lock[txq_map]);
+		enic->wq_stats[wq->index].desc_full_awake++;
 		return NETDEV_TX_BUSY;
 	}
 
 	if (enic_queue_wq_skb(enic, wq, skb))
 		goto error;
 
-	if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
+	if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) {
 		netif_tx_stop_queue(txq);
+		enic->wq_stats[wq->index].stopped++;
+	}
 	skb_tx_timestamp(skb);
 	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
 		vnic_wq_doorbell(wq);
@@ -878,7 +917,10 @@ static void enic_get_stats(struct net_device *netdev,
 {
 	struct enic *enic = netdev_priv(netdev);
 	struct vnic_stats *stats;
+	u64 pkt_truncated = 0;
+	u64 bad_fcs = 0;
 	int err;
+	int i;
 
 	err = enic_dev_stats_dump(enic, &stats);
 	/* return only when dma_alloc_coherent fails in vnic_dev_stats_dump
@@ -897,8 +939,17 @@ static void enic_get_stats(struct net_device *netdev,
 	net_stats->rx_bytes = stats->rx.rx_bytes_ok;
 	net_stats->rx_errors = stats->rx.rx_errors;
 	net_stats->multicast = stats->rx.rx_multicast_frames_ok;
-	net_stats->rx_over_errors = enic->rq_truncated_pkts;
-	net_stats->rx_crc_errors = enic->rq_bad_fcs;
+
+	for (i = 0; i < ENIC_RQ_MAX; i++) {
+		struct enic_rq_stats *rqs = &enic->rq_stats[i];
+
+		if (!enic->rq->ctrl)
+			break;
+		pkt_truncated += rqs->pkt_truncated;
+		bad_fcs += rqs->bad_fcs;
+	}
+	net_stats->rx_over_errors = pkt_truncated;
+	net_stats->rx_crc_errors = bad_fcs;
 	net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop;
 }
 
@@ -1261,8 +1312,10 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
 		return 0;
 	}
 	skb = netdev_alloc_skb_ip_align(netdev, len);
-	if (!skb)
+	if (!skb) {
+		enic->rq_stats[rq->index].no_skb++;
 		return -ENOMEM;
+	}
 
 	dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len,
 				  DMA_FROM_DEVICE);
@@ -1313,6 +1366,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 	struct net_device *netdev = enic->netdev;
 	struct sk_buff *skb;
 	struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+	struct enic_rq_stats *rqstats = &enic->rq_stats[rq->index];
 
 	u8 type, color, eop, sop, ingress_port, vlan_stripped;
 	u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
@@ -1323,8 +1377,11 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 	u32 rss_hash;
 	bool outer_csum_ok = true, encap = false;
 
-	if (skipped)
+	rqstats->packets++;
+	if (skipped) {
+		rqstats->desc_skip++;
 		return;
+	}
 
 	skb = buf->os_buf;
 
@@ -1342,9 +1399,9 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 
 		if (!fcs_ok) {
 			if (bytes_written > 0)
-				enic->rq_bad_fcs++;
+				rqstats->bad_fcs++;
 			else if (bytes_written == 0)
-				enic->rq_truncated_pkts++;
+				rqstats->pkt_truncated++;
 		}
 
 		dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
@@ -1359,7 +1416,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 
 		/* Good receive
 		 */
-
+		rqstats->bytes += bytes_written;
 		if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) {
 			buf->os_buf = NULL;
 			dma_unmap_single(&enic->pdev->dev, buf->dma_addr,
@@ -1377,11 +1434,13 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 			case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
 			case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
 				skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
+				rqstats->l4_rss_hash++;
 				break;
 			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
 			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
 			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
 				skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
+				rqstats->l3_rss_hash++;
 				break;
 			}
 		}
@@ -1418,11 +1477,16 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 		    (ipv4_csum_ok || ipv6)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			skb->csum_level = encap;
+			if (encap)
+				rqstats->csum_unnecessary_encap++;
+			else
+				rqstats->csum_unnecessary++;
 		}
 
-		if (vlan_stripped)
+		if (vlan_stripped) {
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
-
+			rqstats->vlan_stripped++;
+		}
 		skb_mark_napi_id(skb, &enic->napi[rq->index]);
 		if (!(netdev->features & NETIF_F_GRO))
 			netif_receive_skb(skb);
@@ -1435,7 +1499,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 
 		/* Buffer overflow
 		 */
-
+		rqstats->pkt_truncated++;
 		dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
 				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
@@ -1568,6 +1632,9 @@ static int enic_poll(struct napi_struct *napi, int budget)
 		if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
 			enic_set_int_moderation(enic, &enic->rq[0]);
 		vnic_intr_unmask(&enic->intr[intr]);
+		enic->rq_stats[0].napi_complete++;
+	} else {
+		enic->rq_stats[0].napi_repoll++;
 	}
 
 	return rq_work_done;
@@ -1693,6 +1760,9 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
 		if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
 			enic_set_int_moderation(enic, &enic->rq[rq]);
 		vnic_intr_unmask(&enic->intr[intr]);
+		enic->rq_stats[rq].napi_complete++;
+	} else {
+		enic->rq_stats[rq].napi_repoll++;
 	}
 
 	return work_done;
@@ -2502,6 +2572,54 @@ static void enic_clear_intr_mode(struct enic *enic)
 	vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
 }
 
+static void enic_get_queue_stats_rx(struct net_device *dev, int idx,
+				    struct netdev_queue_stats_rx *rxs)
+{
+	struct enic *enic = netdev_priv(dev);
+	struct enic_rq_stats *rqstats = &enic->rq_stats[idx];
+
+	rxs->bytes = rqstats->bytes;
+	rxs->packets = rqstats->packets;
+	rxs->hw_drops = rqstats->bad_fcs + rqstats->pkt_truncated;
+	rxs->hw_drop_overruns = rqstats->pkt_truncated;
+	rxs->csum_unnecessary = rqstats->csum_unnecessary +
+				rqstats->csum_unnecessary_encap;
+}
+
+static void enic_get_queue_stats_tx(struct net_device *dev, int idx,
+				    struct netdev_queue_stats_tx *txs)
+{
+	struct enic *enic = netdev_priv(dev);
+	struct enic_wq_stats *wqstats = &enic->wq_stats[idx];
+
+	txs->bytes = wqstats->bytes;
+	txs->packets = wqstats->packets;
+	txs->csum_none = wqstats->csum_none;
+	txs->needs_csum = wqstats->csum_partial + wqstats->encap_csum +
+			  wqstats->tso;
+	txs->hw_gso_packets = wqstats->tso;
+	txs->stop = wqstats->stopped;
+	txs->wake = wqstats->wake;
+}
+
+static void enic_get_base_stats(struct net_device *dev,
+				struct netdev_queue_stats_rx *rxs,
+				struct netdev_queue_stats_tx *txs)
+{
+	rxs->bytes = 0;
+	rxs->packets = 0;
+	rxs->hw_drops = 0;
+	rxs->hw_drop_overruns = 0;
+	rxs->csum_unnecessary = 0;
+	txs->bytes = 0;
+	txs->packets = 0;
+	txs->csum_none = 0;
+	txs->needs_csum = 0;
+	txs->hw_gso_packets = 0;
+	txs->stop = 0;
+	txs->wake = 0;
+}
+
 static const struct net_device_ops enic_netdev_dynamic_ops = {
 	.ndo_open		= enic_open,
 	.ndo_stop		= enic_stop,
@@ -2550,6 +2668,12 @@ static const struct net_device_ops enic_netdev_ops = {
 	.ndo_features_check	= enic_features_check,
 };
 
+static const struct netdev_stat_ops enic_netdev_stat_ops = {
+	.get_queue_stats_rx	= enic_get_queue_stats_rx,
+	.get_queue_stats_tx	= enic_get_queue_stats_tx,
+	.get_base_stats		= enic_get_base_stats,
+};
+
 static void enic_dev_deinit(struct enic *enic)
 {
 	unsigned int i;
@@ -2892,6 +3016,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->netdev_ops = &enic_netdev_dynamic_ops;
 	else
 		netdev->netdev_ops = &enic_netdev_ops;
+	netdev->stat_ops = &enic_netdev_stat_ops;
 
 	netdev->watchdog_timeo = 2 * HZ;
 	enic_set_ethtool_ops(netdev);
diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c
index bcfe52c11804..59ea48d4c9de 100644
--- a/drivers/net/ethernet/davicom/dm9051.c
+++ b/drivers/net/ethernet/davicom/dm9051.c
@@ -1235,6 +1235,7 @@ static const struct of_device_id dm9051_match_table[] = {
 	{ .compatible = "davicom,dm9051" },
 	{}
 };
+MODULE_DEVICE_TABLE(of, dm9051_match_table);
 
 static const struct spi_device_id dm9051_id_table[] = {
 	{ "dm9051", 0 },
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 7bfeae04b52b..d0ea92607870 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -1842,7 +1842,7 @@ static int rio_resume(struct device *device)
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(rio_pm_ops, rio_suspend, rio_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(rio_pm_ops, rio_suspend, rio_resume);
 #define RIO_PM_OPS    (&rio_pm_ops)
 
 #else
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 61fe9625bed1..e48b861e4ce1 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -966,9 +966,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm,
 void be_link_status_update(struct be_adapter *adapter, u8 link_status);
 void be_parse_stats(struct be_adapter *adapter);
 int be_load_fw(struct be_adapter *adapter, u8 *func);
-bool be_is_wol_supported(struct be_adapter *adapter);
 bool be_pause_supported(struct be_adapter *adapter);
-u32 be_get_fw_log_level(struct be_adapter *adapter);
 int be_update_queues(struct be_adapter *adapter);
 int be_poll(struct napi_struct *napi, int budget);
 void be_eqd_update(struct be_adapter *adapter, bool force_update);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index e2085c68c0ee..d70818f06be7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2381,7 +2381,6 @@ struct be_cmd_req_manage_iface_filters {
 } __packed;
 
 u16 be_POST_stage_get(struct be_adapter *adapter);
-int be_pci_fnum_get(struct be_adapter *adapter);
 int be_fw_wait_ready(struct be_adapter *adapter);
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 			  bool permanent, u32 if_handle, u32 pmac_id);
@@ -2406,7 +2405,6 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q);
 int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
 			     u8 *link_status, u32 dom);
-int be_cmd_reset(struct be_adapter *adapter);
 int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd);
 int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
 			       struct be_dma_mem *nonemb_cmd);
@@ -2488,7 +2486,6 @@ int lancer_physdev_ctrl(struct be_adapter *adapter, u32 mask);
 int lancer_initiate_dump(struct be_adapter *adapter);
 int lancer_delete_dump(struct be_adapter *adapter);
 bool dump_present(struct be_adapter *adapter);
-int lancer_test_and_set_rdy_state(struct be_adapter *adapter);
 int be_cmd_query_port_name(struct be_adapter *adapter);
 int be_cmd_get_func_config(struct be_adapter *adapter,
 			   struct be_resources *res);
diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c
index 9aa286ba1f00..228a638eae16 100644
--- a/drivers/net/ethernet/engleder/tsnep_ethtool.c
+++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c
@@ -310,16 +310,12 @@ static int tsnep_ethtool_get_ts_info(struct net_device *netdev,
 	struct tsnep_adapter *adapter = netdev_priv(netdev);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (adapter->ptp_clock)
 		info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
 			 BIT(HWTSTAMP_TX_ON);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index fddfd1dd5070..f3cc14cc757d 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -24,6 +24,7 @@
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
 #include <linux/of_net.h>
+#include <linux/phy_fixed.h>
 #include <net/ip.h>
 #include <net/ncsi.h>
 
@@ -50,6 +51,15 @@
 #define FTGMAC_100MHZ		100000000
 #define FTGMAC_25MHZ		25000000
 
+/* For NC-SI to register a fixed-link phy device */
+static struct fixed_phy_status ncsi_phy_status = {
+	.link = 1,
+	.speed = SPEED_100,
+	.duplex = DUPLEX_FULL,
+	.pause = 0,
+	.asym_pause = 0
+};
+
 struct ftgmac100 {
 	/* Registers */
 	struct resource *res;
@@ -572,7 +582,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
 	(*processed)++;
 	return true;
 
- drop:
+drop:
 	/* Clean rxdes0 (which resets own bit) */
 	rxdes->rxdes0 = cpu_to_le32(status & priv->rxdes0_edorr_mask);
 	priv->rx_pointer = ftgmac100_next_rx_pointer(priv, pointer);
@@ -656,6 +666,11 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv)
 	ftgmac100_free_tx_packet(priv, pointer, skb, txdes, ctl_stat);
 	txdes->txdes0 = cpu_to_le32(ctl_stat & priv->txdes0_edotr_mask);
 
+	/* Ensure the descriptor config is visible before setting the tx
+	 * pointer.
+	 */
+	smp_wmb();
+
 	priv->tx_clean_pointer = ftgmac100_next_tx_pointer(priv, pointer);
 
 	return true;
@@ -809,6 +824,11 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	dma_wmb();
 	first->txdes0 = cpu_to_le32(f_ctl_stat);
 
+	/* Ensure the descriptor config is visible before setting the tx
+	 * pointer.
+	 */
+	smp_wmb();
+
 	/* Update next TX pointer */
 	priv->tx_pointer = pointer;
 
@@ -829,7 +849,7 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
 
 	return NETDEV_TX_OK;
 
- dma_err:
+dma_err:
 	if (net_ratelimit())
 		netdev_err(netdev, "map tx fragment failed\n");
 
@@ -851,7 +871,7 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
 	 * last fragment, so we know ftgmac100_free_tx_packet()
 	 * hasn't freed the skb yet.
 	 */
- drop:
+drop:
 	/* Drop the packet */
 	dev_kfree_skb_any(skb);
 	netdev->stats.tx_dropped++;
@@ -1344,7 +1364,7 @@ static void ftgmac100_reset(struct ftgmac100 *priv)
 	ftgmac100_init_all(priv, true);
 
 	netdev_dbg(netdev, "Reset done !\n");
- bail:
+bail:
 	if (priv->mii_bus)
 		mutex_unlock(&priv->mii_bus->mdio_lock);
 	if (netdev->phydev)
@@ -1531,7 +1551,8 @@ static int ftgmac100_open(struct net_device *netdev)
 	if (netdev->phydev) {
 		/* If we have a PHY, start polling */
 		phy_start(netdev->phydev);
-	} else if (priv->use_ncsi) {
+	}
+	if (priv->use_ncsi) {
 		/* If using NC-SI, set our carrier on and start the stack */
 		netif_carrier_on(netdev);
 
@@ -1543,15 +1564,16 @@ static int ftgmac100_open(struct net_device *netdev)
 
 	return 0;
 
- err_ncsi:
+err_ncsi:
+	phy_stop(netdev->phydev);
 	napi_disable(&priv->napi);
 	netif_stop_queue(netdev);
- err_alloc:
+err_alloc:
 	ftgmac100_free_buffers(priv);
 	free_irq(netdev->irq, netdev);
- err_irq:
+err_irq:
 	netif_napi_del(&priv->napi);
- err_hw:
+err_hw:
 	iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
 	ftgmac100_free_rings(priv);
 	return err;
@@ -1577,7 +1599,7 @@ static int ftgmac100_stop(struct net_device *netdev)
 	netif_napi_del(&priv->napi);
 	if (netdev->phydev)
 		phy_stop(netdev->phydev);
-	else if (priv->use_ncsi)
+	if (priv->use_ncsi)
 		ncsi_stop_dev(priv->ndev);
 
 	ftgmac100_stop_hw(priv);
@@ -1715,6 +1737,9 @@ static void ftgmac100_phy_disconnect(struct net_device *netdev)
 	phy_disconnect(netdev->phydev);
 	if (of_phy_is_fixed_link(priv->dev->of_node))
 		of_phy_deregister_fixed_link(priv->dev->of_node);
+
+	if (priv->use_ncsi)
+		fixed_phy_unregister(netdev->phydev);
 }
 
 static void ftgmac100_destroy_mdio(struct net_device *netdev)
@@ -1792,6 +1817,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	struct resource *res;
 	int irq;
 	struct net_device *netdev;
+	struct phy_device *phydev;
 	struct ftgmac100 *priv;
 	struct device_node *np;
 	int err = 0;
@@ -1879,6 +1905,14 @@ static int ftgmac100_probe(struct platform_device *pdev)
 			err = -EINVAL;
 			goto err_phy_connect;
 		}
+
+		phydev = fixed_phy_register(PHY_POLL, &ncsi_phy_status, NULL);
+		err = phy_connect_direct(netdev, phydev, ftgmac100_adjust_link,
+					 PHY_INTERFACE_MODE_MII);
+		if (err) {
+			dev_err(&pdev->dev, "Connecting PHY failed\n");
+			goto err_phy_connect;
+		}
 	} else if (np && of_phy_is_fixed_link(np)) {
 		struct phy_device *phy;
 
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 63b3e02fab16..4968f6f0bdbc 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -84,7 +84,7 @@
 			    FTGMAC100_INT_RPKT_BUF)
 
 /* All the interrupts we care about */
-#define FTGMAC100_INT_ALL (FTGMAC100_INT_RPKT_BUF  |  \
+#define FTGMAC100_INT_ALL (FTGMAC100_INT_RXTX  |  \
 			   FTGMAC100_INT_BAD)
 
 /*
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index cfe6b57b1da0..e15dd3d858df 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -229,7 +229,7 @@ static int dpaa_netdev_init(struct net_device *net_dev,
 	net_dev->max_mtu = dpaa_get_max_mtu();
 
 	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-				 NETIF_F_LLTX | NETIF_F_RXHASH);
+				 NETIF_F_RXHASH);
 
 	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
 	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
@@ -239,6 +239,7 @@ static int dpaa_netdev_init(struct net_device *net_dev,
 	net_dev->features |= NETIF_F_RXCSUM;
 
 	net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	net_dev->lltx = true;
 	/* we do not want shared skbs on TX */
 	net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
@@ -2272,12 +2273,12 @@ static netdev_tx_t
 dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 {
 	const int queue_mapping = skb_get_queue_mapping(skb);
-	bool nonlinear = skb_is_nonlinear(skb);
 	struct rtnl_link_stats64 *percpu_stats;
 	struct dpaa_percpu_priv *percpu_priv;
 	struct netdev_queue *txq;
 	struct dpaa_priv *priv;
 	struct qm_fd fd;
+	bool nonlinear;
 	int offset = 0;
 	int err = 0;
 
@@ -2287,6 +2288,13 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 
 	qm_fd_clear_fd(&fd);
 
+	/* Packet data is always read as 32-bit words, so zero out any part of
+	 * the skb which might be sent if we have to pad the packet
+	 */
+	if (__skb_put_padto(skb, ETH_ZLEN, false))
+		goto enomem;
+
+	nonlinear = skb_is_nonlinear(skb);
 	if (!nonlinear) {
 		/* We're going to store the skb backpointer at the beginning
 		 * of the data buffer, so we need a privately owned skb
@@ -3156,8 +3164,9 @@ static void dpaa_napi_del(struct net_device *net_dev)
 	for_each_possible_cpu(cpu) {
 		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
 
-		netif_napi_del(&percpu_priv->np.napi);
+		__netif_napi_del(&percpu_priv->np.napi);
 	}
+	synchronize_net();
 }
 
 static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 6866807973da..29886a8ba73f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -4594,12 +4594,13 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev)
 
 	net_dev->priv_flags |= supported;
 	net_dev->priv_flags &= ~not_supported;
+	net_dev->lltx = true;
 
 	/* Features */
 	net_dev->features = NETIF_F_RXCSUM |
 			    NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			    NETIF_F_SG | NETIF_F_HIGHDMA |
-			    NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO;
+			    NETIF_F_HW_TC | NETIF_F_TSO;
 	net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS;
 	net_dev->hw_features = net_dev->features;
 	net_dev->xdp_features = NETDEV_XDP_ACT_BASIC |
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index a71f848adc05..a293b08f36d4 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw)
 
 static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw)
 {
-	int *count, i;
+	int *count, ret, i;
 
 	for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) {
+		ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
 		count = &ethsw->buf_count;
-		*count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+		*count += ret;
 
-		if (unlikely(*count < BUFS_PER_CMD))
+		if (unlikely(ret < BUFS_PER_CMD))
 			return -ENOMEM;
 	}
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 5c45f42232d3..032d8eadd003 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -977,7 +977,6 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	return j;
 }
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 static void enetc_get_rx_tstamp(struct net_device *ndev,
 				union enetc_rx_bd *rxbd,
 				struct sk_buff *skb)
@@ -1001,7 +1000,6 @@ static void enetc_get_rx_tstamp(struct net_device *ndev,
 		shhwtstamps->hwtstamp = ns_to_ktime(tstamp);
 	}
 }
-#endif
 
 static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 			       union enetc_rx_bd *rxbd, struct sk_buff *skb)
@@ -1041,10 +1039,9 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 		__vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt));
 	}
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
-	if (priv->active_offloads & ENETC_F_RX_TSTAMP)
+	if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) &&
+	    (priv->active_offloads & ENETC_F_RX_TSTAMP))
 		enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
-#endif
 }
 
 /* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS,
@@ -2305,12 +2302,11 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
 
 		snprintf(v->name, sizeof(v->name), "%s-rxtx%d",
 			 priv->ndev->name, i);
-		err = request_irq(irq, enetc_msix, 0, v->name, v);
+		err = request_irq(irq, enetc_msix, IRQF_NO_AUTOEN, v->name, v);
 		if (err) {
 			dev_err(priv->dev, "request_irq() failed!\n");
 			goto irq_err;
 		}
-		disable_irq(irq);
 
 		v->tbier_base = hw->reg + ENETC_BDR(TX, 0, ENETC_TBIER);
 		v->rbier = hw->reg + ENETC_BDR(RX, i, ENETC_RBIER);
@@ -2882,7 +2878,6 @@ void enetc_set_features(struct net_device *ndev, netdev_features_t features)
 }
 EXPORT_SYMBOL_GPL(enetc_set_features);
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -2951,17 +2946,17 @@ static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
 	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
 	       -EFAULT : 0;
 }
-#endif
 
 int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
-	if (cmd == SIOCSHWTSTAMP)
-		return enetc_hwtstamp_set(ndev, rq);
-	if (cmd == SIOCGHWTSTAMP)
-		return enetc_hwtstamp_get(ndev, rq);
-#endif
+
+	if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) {
+		if (cmd == SIOCSHWTSTAMP)
+			return enetc_hwtstamp_set(ndev, rq);
+		if (cmd == SIOCGHWTSTAMP)
+			return enetc_hwtstamp_get(ndev, rq);
+	}
 
 	if (!priv->phylink)
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index a9c2ff22431c..97524dfa234c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -184,10 +184,9 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i)
 {
 	int hw_idx = i;
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
-	if (rx_ring->ext_en)
+	if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en)
 		hw_idx = 2 * i;
-#endif
+
 	return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]);
 }
 
@@ -199,10 +198,8 @@ static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring,
 
 	new_rxbd++;
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
-	if (rx_ring->ext_en)
+	if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en)
 		new_rxbd++;
-#endif
 
 	if (unlikely(++new_index == rx_ring->bd_count)) {
 		new_rxbd = rx_ring->bd_base;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 5e684b23c5f5..2563eb8ac7b6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -849,28 +849,26 @@ static int enetc_get_ts_info(struct net_device *ndev,
 	if (phc_idx) {
 		info->phc_index = *phc_idx;
 		symbol_put(enetc_phc_index);
-	} else {
-		info->phc_index = -1;
 	}
 
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
+	if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) {
+		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
+
+		return 0;
+	}
+
 	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE |
-				SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+				SOF_TIMESTAMPING_TX_SOFTWARE;
 
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
 			 (1 << HWTSTAMP_TX_ON) |
 			 (1 << HWTSTAMP_TX_ONESTEP_SYNC);
+
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 			   (1 << HWTSTAMP_FILTER_ALL);
-#else
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
-#endif
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a923cb95cdc6..acbb627d51bf 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2775,15 +2775,11 @@ static int fec_enet_get_ts_info(struct net_device *ndev,
 	if (fep->bufdesc_ex) {
 
 		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-					SOF_TIMESTAMPING_RX_SOFTWARE |
-					SOF_TIMESTAMPING_SOFTWARE |
 					SOF_TIMESTAMPING_TX_HARDWARE |
 					SOF_TIMESTAMPING_RX_HARDWARE |
 					SOF_TIMESTAMPING_RAW_HARDWARE;
 		if (fep->ptp_clock)
 			info->phc_index = ptp_clock_index(fep->ptp_clock);
-		else
-			info->phc_index = -1;
 
 		info->tx_types = (1 << HWTSTAMP_TX_OFF) |
 				 (1 << HWTSTAMP_TX_ON);
@@ -4606,7 +4602,7 @@ fec_drv_remove(struct platform_device *pdev)
 	free_netdev(ndev);
 }
 
-static int __maybe_unused fec_suspend(struct device *dev)
+static int fec_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -4659,7 +4655,7 @@ static int __maybe_unused fec_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused fec_resume(struct device *dev)
+static int fec_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -4714,7 +4710,7 @@ failed_clk:
 	return ret;
 }
 
-static int __maybe_unused fec_runtime_suspend(struct device *dev)
+static int fec_runtime_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -4725,7 +4721,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused fec_runtime_resume(struct device *dev)
+static int fec_runtime_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -4746,14 +4742,14 @@ failed_clk_ipg:
 }
 
 static const struct dev_pm_ops fec_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume)
-	SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL)
+	SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume)
+	RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL)
 };
 
 static struct platform_driver fec_driver = {
 	.driver	= {
 		.name	= DRIVER_NAME,
-		.pm	= &fec_pm_ops,
+		.pm	= pm_ptr(&fec_pm_ops),
 		.of_match_table = fec_dt_ids,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index e32f6724f568..4cffda363a14 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -91,6 +91,30 @@
 #define FEC_PTP_MAX_NSEC_COUNTER	0x80000000ULL
 
 /**
+ * fec_ptp_read - read raw cycle counter (to be used by time counter)
+ * @cc: the cyclecounter structure
+ *
+ * this function reads the cyclecounter registers and is called by the
+ * cyclecounter structure used to construct a ns counter from the
+ * arbitrary fixed point registers
+ */
+static u64 fec_ptp_read(const struct cyclecounter *cc)
+{
+	struct fec_enet_private *fep =
+		container_of(cc, struct fec_enet_private, cc);
+	u32 tempval;
+
+	tempval = readl(fep->hwp + FEC_ATIME_CTRL);
+	tempval |= FEC_T_CTRL_CAPTURE;
+	writel(tempval, fep->hwp + FEC_ATIME_CTRL);
+
+	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
+		udelay(1);
+
+	return readl(fep->hwp + FEC_ATIME);
+}
+
+/**
  * fec_ptp_enable_pps
  * @fep: the fec_enet_private structure handle
  * @enable: enable the channel pps output
@@ -136,7 +160,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
 		 * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds
 		 * to current timer would be next second.
 		 */
-		tempval = fep->cc.read(&fep->cc);
+		tempval = fec_ptp_read(&fep->cc);
 		/* Convert the ptp local counter to 1588 timestamp */
 		ns = timecounter_cyc2time(&fep->tc, tempval);
 		ts = ns_to_timespec64(ns);
@@ -211,13 +235,7 @@ static int fec_ptp_pps_perout(struct fec_enet_private *fep)
 	timecounter_read(&fep->tc);
 
 	/* Get the current ptp hardware time counter */
-	temp_val = readl(fep->hwp + FEC_ATIME_CTRL);
-	temp_val |= FEC_T_CTRL_CAPTURE;
-	writel(temp_val, fep->hwp + FEC_ATIME_CTRL);
-	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
-		udelay(1);
-
-	ptp_hc = readl(fep->hwp + FEC_ATIME);
+	ptp_hc = fec_ptp_read(&fep->cc);
 
 	/* Convert the ptp local counter to 1588 timestamp */
 	curr_time = timecounter_cyc2time(&fep->tc, ptp_hc);
@@ -272,30 +290,6 @@ static enum hrtimer_restart fec_ptp_pps_perout_handler(struct hrtimer *timer)
 }
 
 /**
- * fec_ptp_read - read raw cycle counter (to be used by time counter)
- * @cc: the cyclecounter structure
- *
- * this function reads the cyclecounter registers and is called by the
- * cyclecounter structure used to construct a ns counter from the
- * arbitrary fixed point registers
- */
-static u64 fec_ptp_read(const struct cyclecounter *cc)
-{
-	struct fec_enet_private *fep =
-		container_of(cc, struct fec_enet_private, cc);
-	u32 tempval;
-
-	tempval = readl(fep->hwp + FEC_ATIME_CTRL);
-	tempval |= FEC_T_CTRL_CAPTURE;
-	writel(tempval, fep->hwp + FEC_ATIME_CTRL);
-
-	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
-		udelay(1);
-
-	return readl(fep->hwp + FEC_ATIME);
-}
-
-/**
  * fec_ptp_start_cyclecounter - create the cycle counter from hw
  * @ndev: network device
  *
@@ -775,6 +769,9 @@ void fec_ptp_stop(struct platform_device *pdev)
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
+	if (fep->pps_enable)
+		fec_ptp_enable_pps(fep, 0);
+
 	cancel_delayed_work_sync(&fep->time_keep);
 	hrtimer_cancel(&fep->perout_timer);
 	if (fep->ptp_clock)
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 406e75e9e5ea..f17a4e511510 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1748,7 +1748,7 @@ static int fman_port_probe(struct platform_device *of_dev)
 	struct resource res;
 	struct resource *dev_res;
 	u32 val;
-	int err = 0, lenp;
+	int err = 0;
 	enum fman_port_type port_type;
 	u16 port_speed;
 	u8 port_id;
@@ -1795,7 +1795,7 @@ static int fman_port_probe(struct platform_device *of_dev)
 	if (of_device_is_compatible(port_node, "fsl,fman-v3-port-tx")) {
 		port_type = FMAN_PORT_TYPE_TX;
 		port_speed = 1000;
-		if (of_find_property(port_node, "fsl,fman-10g-port", &lenp))
+		if (of_property_read_bool(port_node, "fsl,fman-10g-port"))
 			port_speed = 10000;
 
 	} else if (of_device_is_compatible(port_node, "fsl,fman-v2-port-tx")) {
@@ -1808,7 +1808,7 @@ static int fman_port_probe(struct platform_device *of_dev)
 	} else if (of_device_is_compatible(port_node, "fsl,fman-v3-port-rx")) {
 		port_type = FMAN_PORT_TYPE_RX;
 		port_speed = 1000;
-		if (of_find_property(port_node, "fsl,fman-10g-port", &lenp))
+		if (of_property_read_bool(port_node, "fsl,fman-10g-port"))
 			port_speed = 10000;
 
 	} else if (of_device_is_compatible(port_node, "fsl,fman-v2-port-rx")) {
diff --git a/drivers/net/ethernet/freescale/fs_enet/Kconfig b/drivers/net/ethernet/freescale/fs_enet/Kconfig
index 7f20840fde07..57013bf14d7c 100644
--- a/drivers/net/ethernet/freescale/fs_enet/Kconfig
+++ b/drivers/net/ethernet/freescale/fs_enet/Kconfig
@@ -3,7 +3,7 @@ config FS_ENET
 	tristate "Freescale Ethernet Driver"
 	depends on NET_VENDOR_FREESCALE && (CPM1 || CPM2 || PPC_MPC512x)
 	select MII
-	select PHYLIB
+	select PHYLINK
 
 config FS_ENET_MPC5121_FEC
 	def_bool y if (FS_ENET && PPC_MPC512x)
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index cf392faa6105..3425c4a6abcb 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Combined Ethernet driver for Motorola MPC8xx and MPC82xx.
  *
@@ -9,10 +10,6 @@
  *
  * Heavily based on original FEC driver by Dan Malek <dan@embeddededge.com>
  * and modifications by Joakim Tjernlund <joakim.tjernlund@lumentis.se>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
@@ -29,17 +26,18 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
-#include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
 #include <linux/fs.h>
 #include <linux/platform_device.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/property.h>
 #include <linux/of.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/pgtable.h>
+#include <linux/rtnetlink.h>
 
 #include <linux/vmalloc.h>
 #include <asm/irq.h>
@@ -72,6 +70,13 @@ static void fs_set_multicast_list(struct net_device *dev)
 	(*fep->ops->set_multicast_list)(dev);
 }
 
+static int fs_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+
+	return phylink_mii_ioctl(fep->phylink, ifr, cmd);
+}
+
 static void skb_align(struct sk_buff *skb, int align)
 {
 	int off = ((unsigned long)skb->data) & (align - 1);
@@ -84,15 +89,14 @@ static void skb_align(struct sk_buff *skb, int align)
 static int fs_enet_napi(struct napi_struct *napi, int budget)
 {
 	struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi);
-	struct net_device *dev = fep->ndev;
 	const struct fs_platform_info *fpi = fep->fpi;
-	cbd_t __iomem *bdp;
+	struct net_device *dev = fep->ndev;
+	int curidx, dirtyidx, received = 0;
+	int do_wake = 0, do_restart = 0;
+	int tx_left = TX_RING_SIZE;
 	struct sk_buff *skb, *skbn;
-	int received = 0;
+	cbd_t __iomem *bdp;
 	u16 pkt_len, sc;
-	int curidx;
-	int dirtyidx, do_wake, do_restart;
-	int tx_left = TX_RING_SIZE;
 
 	spin_lock(&fep->tx_lock);
 	bdp = fep->dirty_tx;
@@ -100,7 +104,6 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 	/* clear status bits for napi*/
 	(*fep->ops->napi_clear_event)(dev);
 
-	do_wake = do_restart = 0;
 	while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) {
 		dirtyidx = bdp - fep->tx_bd_base;
 
@@ -109,12 +112,9 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 
 		skb = fep->tx_skbuff[dirtyidx];
 
-		/*
-		 * Check for errors.
-		 */
+		 /* Check for errors. */
 		if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC |
 			  BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) {
-
 			if (sc & BD_ENET_TX_HB)	/* No heartbeat */
 				dev->stats.tx_heartbeat_errors++;
 			if (sc & BD_ENET_TX_LC)	/* Late collision */
@@ -130,16 +130,16 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 				dev->stats.tx_errors++;
 				do_restart = 1;
 			}
-		} else
+		} else {
 			dev->stats.tx_packets++;
+		}
 
 		if (sc & BD_ENET_TX_READY) {
 			dev_warn(fep->dev,
 				 "HEY! Enet xmit interrupt and TX_READY.\n");
 		}
 
-		/*
-		 * Deferred means some collisions occurred during transmit,
+		/* Deferred means some collisions occurred during transmit,
 		 * but we eventually sent the packet OK.
 		 */
 		if (sc & BD_ENET_TX_DEF)
@@ -153,25 +153,20 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
 					 CBDR_DATLEN(bdp), DMA_TO_DEVICE);
 
-		/*
-		 * Free the sk buffer associated with this last transmit.
-		 */
+		/* Free the sk buffer associated with this last transmit. */
 		if (skb) {
 			dev_kfree_skb(skb);
 			fep->tx_skbuff[dirtyidx] = NULL;
 		}
 
-		/*
-		 * Update pointer to next buffer descriptor to be transmitted.
+		/* Update pointer to next buffer descriptor to be transmitted.
 		 */
 		if ((sc & BD_ENET_TX_WRAP) == 0)
 			bdp++;
 		else
 			bdp = fep->tx_bd_base;
 
-		/*
-		 * Since we have freed up a buffer, the ring is no longer
-		 * full.
+		/* Since we have freed up a buffer, the ring is no longer full.
 		 */
 		if (++fep->tx_free == MAX_SKB_FRAGS)
 			do_wake = 1;
@@ -188,8 +183,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 	if (do_wake)
 		netif_wake_queue(dev);
 
-	/*
-	 * First, grab all of the stats for the incoming packet.
+	/* First, grab all of the stats for the incoming packet.
 	 * These get messed up if we get called due to a busy condition.
 	 */
 	bdp = fep->cur_rx;
@@ -198,16 +192,13 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 	       received < budget) {
 		curidx = bdp - fep->rx_bd_base;
 
-		/*
-		 * Since we have allocated space to hold a complete frame,
+		/* Since we have allocated space to hold a complete frame,
 		 * the last indicator should be set.
 		 */
 		if ((sc & BD_ENET_RX_LAST) == 0)
 			dev_warn(fep->dev, "rcv is not +last\n");
 
-		/*
-		 * Check for errors.
-		 */
+		/* Check for errors. */
 		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
 			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
 			dev->stats.rx_errors++;
@@ -228,9 +219,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 		} else {
 			skb = fep->rx_skbuff[curidx];
 
-			/*
-			 * Process the incoming frame.
-			 */
+			/* Process the incoming frame */
 			dev->stats.rx_packets++;
 			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
 			dev->stats.rx_bytes += pkt_len + 4;
@@ -238,15 +227,15 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 			if (pkt_len <= fpi->rx_copybreak) {
 				/* +2 to make IP header L1 cache aligned */
 				skbn = netdev_alloc_skb(dev, pkt_len + 2);
-				if (skbn != NULL) {
+				if (skbn) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					skb_copy_from_linear_data(skb,
-						      skbn->data, pkt_len);
+					skb_copy_from_linear_data(skb, skbn->data,
+								  pkt_len);
 					swap(skb, skbn);
 					dma_sync_single_for_cpu(fep->dev,
-						CBDR_BUFADDR(bdp),
-						L1_CACHE_ALIGN(pkt_len),
-						DMA_FROM_DEVICE);
+								CBDR_BUFADDR(bdp),
+								L1_CACHE_ALIGN(pkt_len),
+								DMA_FROM_DEVICE);
 				}
 			} else {
 				skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
@@ -256,20 +245,18 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 
 					skb_align(skbn, ENET_RX_ALIGN);
 
-					dma_unmap_single(fep->dev,
-						CBDR_BUFADDR(bdp),
-						L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-						DMA_FROM_DEVICE);
+					dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
+							 L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+							 DMA_FROM_DEVICE);
 
-					dma = dma_map_single(fep->dev,
-						skbn->data,
-						L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-						DMA_FROM_DEVICE);
+					dma = dma_map_single(fep->dev, skbn->data,
+							     L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+							     DMA_FROM_DEVICE);
 					CBDW_BUFADDR(bdp, dma);
 				}
 			}
 
-			if (skbn != NULL) {
+			if (skbn) {
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -284,9 +271,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 		CBDW_DATLEN(bdp, 0);
 		CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
 
-		/*
-		 * Update BD pointer to next entry.
-		 */
+		/* Update BD pointer to next entry */
 		if ((sc & BD_ENET_RX_WRAP) == 0)
 			bdp++;
 		else
@@ -308,19 +293,16 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 	return budget;
 }
 
-/*
- * The interrupt handler.
+/* The interrupt handler.
  * This is called from the MPC core interrupt.
  */
 static irqreturn_t
 fs_enet_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
+	u32 int_events, int_clr_events;
 	struct fs_enet_private *fep;
-	u32 int_events;
-	u32 int_clr_events;
-	int nr, napi_ok;
-	int handled;
+	int nr, napi_ok, handled;
 
 	fep = netdev_priv(dev);
 
@@ -342,12 +324,12 @@ fs_enet_interrupt(int irq, void *dev_id)
 			(*fep->ops->napi_disable)(dev);
 			(*fep->ops->clear_int_events)(dev, fep->ev_napi);
 
-			/* NOTE: it is possible for FCCs in NAPI mode    */
-			/* to submit a spurious interrupt while in poll  */
+			/* NOTE: it is possible for FCCs in NAPI mode
+			 * to submit a spurious interrupt while in poll
+			 */
 			if (napi_ok)
 				__napi_schedule(&fep->napi);
 		}
-
 	}
 
 	handled = nr > 0;
@@ -357,45 +339,40 @@ fs_enet_interrupt(int irq, void *dev_id)
 void fs_init_bds(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
-	cbd_t __iomem *bdp;
 	struct sk_buff *skb;
+	cbd_t __iomem *bdp;
 	int i;
 
 	fs_cleanup_bds(dev);
 
-	fep->dirty_tx = fep->cur_tx = fep->tx_bd_base;
+	fep->dirty_tx = fep->tx_bd_base;
+	fep->cur_tx = fep->tx_bd_base;
 	fep->tx_free = fep->tx_ring;
 	fep->cur_rx = fep->rx_bd_base;
 
-	/*
-	 * Initialize the receive buffer descriptors.
-	 */
+	/* Initialize the receive buffer descriptors */
 	for (i = 0, bdp = fep->rx_bd_base; i < fep->rx_ring; i++, bdp++) {
 		skb = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
-		if (skb == NULL)
+		if (!skb)
 			break;
 
 		skb_align(skb, ENET_RX_ALIGN);
 		fep->rx_skbuff[i] = skb;
-		CBDW_BUFADDR(bdp,
-			dma_map_single(fep->dev, skb->data,
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE));
+		CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skb->data,
+						 L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+						 DMA_FROM_DEVICE));
 		CBDW_DATLEN(bdp, 0);	/* zero */
 		CBDW_SC(bdp, BD_ENET_RX_EMPTY |
 			((i < fep->rx_ring - 1) ? 0 : BD_SC_WRAP));
 	}
-	/*
-	 * if we failed, fillup remainder
-	 */
+
+	/* if we failed, fillup remainder */
 	for (; i < fep->rx_ring; i++, bdp++) {
 		fep->rx_skbuff[i] = NULL;
 		CBDW_SC(bdp, (i < fep->rx_ring - 1) ? 0 : BD_SC_WRAP);
 	}
 
-	/*
-	 * ...and the same for transmit.
-	 */
+	/* ...and the same for transmit. */
 	for (i = 0, bdp = fep->tx_bd_base; i < fep->tx_ring; i++, bdp++) {
 		fep->tx_skbuff[i] = NULL;
 		CBDW_BUFADDR(bdp, 0);
@@ -411,32 +388,30 @@ void fs_cleanup_bds(struct net_device *dev)
 	cbd_t __iomem *bdp;
 	int i;
 
-	/*
-	 * Reset SKB transmit buffers.
-	 */
+	/* Reset SKB transmit buffers. */
 	for (i = 0, bdp = fep->tx_bd_base; i < fep->tx_ring; i++, bdp++) {
-		if ((skb = fep->tx_skbuff[i]) == NULL)
+		skb = fep->tx_skbuff[i];
+		if (!skb)
 			continue;
 
 		/* unmap */
 		dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				skb->len, DMA_TO_DEVICE);
+				 skb->len, DMA_TO_DEVICE);
 
 		fep->tx_skbuff[i] = NULL;
 		dev_kfree_skb(skb);
 	}
 
-	/*
-	 * Reset SKB receive buffers
-	 */
+	/* Reset SKB receive buffers */
 	for (i = 0, bdp = fep->rx_bd_base; i < fep->rx_ring; i++, bdp++) {
-		if ((skb = fep->rx_skbuff[i]) == NULL)
+		skb = fep->rx_skbuff[i];
+		if (!skb)
 			continue;
 
 		/* unmap */
 		dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-			L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-			DMA_FROM_DEVICE);
+				 L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+				 DMA_FROM_DEVICE);
 
 		fep->rx_skbuff[i] = NULL;
 
@@ -444,12 +419,8 @@ void fs_cleanup_bds(struct net_device *dev)
 	}
 }
 
-/**********************************************************************************/
-
 #ifdef CONFIG_FS_ENET_MPC5121_FEC
-/*
- * MPC5121 FEC requeries 4-byte alignment for TX data buffer!
- */
+/* MPC5121 FEC requires 4-byte alignment for TX data buffer! */
 static struct sk_buff *tx_skb_align_workaround(struct net_device *dev,
 					       struct sk_buff *skb)
 {
@@ -481,15 +452,12 @@ static netdev_tx_t
 fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
+	int curidx, nr_frags, len;
 	cbd_t __iomem *bdp;
-	int curidx;
-	u16 sc;
-	int nr_frags;
 	skb_frag_t *frag;
-	int len;
+	u16 sc;
 #ifdef CONFIG_FS_ENET_MPC5121_FEC
-	int is_aligned = 1;
-	int i;
+	int i, is_aligned = 1;
 
 	if (!IS_ALIGNED((unsigned long)skb->data, 4)) {
 		is_aligned = 0;
@@ -507,8 +475,7 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!is_aligned) {
 		skb = tx_skb_align_workaround(dev, skb);
 		if (!skb) {
-			/*
-			 * We have lost packet due to memory allocation error
+			/* We have lost packet due to memory allocation error
 			 * in tx_skb_align_workaround(). Hopefully original
 			 * skb is still valid, so try transmit it later.
 			 */
@@ -519,9 +486,7 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	spin_lock(&fep->tx_lock);
 
-	/*
-	 * Fill in a Tx ring entry
-	 */
+	/* Fill in a Tx ring entry */
 	bdp = fep->cur_tx;
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
@@ -529,8 +494,7 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 		spin_unlock(&fep->tx_lock);
 
-		/*
-		 * Ooops.  All transmit buffers are full.  Bail out.
+		/* Ooops.  All transmit buffers are full.  Bail out.
 		 * This should not happen, since the tx queue should be stopped.
 		 */
 		dev_warn(fep->dev, "tx queue full!.\n");
@@ -543,12 +507,12 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_bytes += len;
 	if (nr_frags)
 		len -= skb->data_len;
+
 	fep->tx_free -= nr_frags + 1;
-	/*
-	 * Push the data cache so the CPM does not get stale memory data.
+	/* Push the data cache so the CPM does not get stale memory data.
 	 */
 	CBDW_BUFADDR(bdp, dma_map_single(fep->dev,
-				skb->data, len, DMA_TO_DEVICE));
+					 skb->data, len, DMA_TO_DEVICE));
 	CBDW_DATLEN(bdp, len);
 
 	fep->mapped_as_page[curidx] = 0;
@@ -585,9 +549,11 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* note that while FEC does not have this bit
 	 * it marks it as available for software use
-	 * yay for hw reuse :) */
+	 * yay for hw reuse :)
+	 */
 	if (skb->len <= 60)
 		sc |= BD_ENET_TX_PAD;
+
 	CBDC_SC(bdp, BD_ENET_TX_STATS);
 	CBDS_SC(bdp, sc);
 
@@ -599,6 +565,7 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		bdp++;
 	else
 		bdp = fep->tx_bd_base;
+
 	fep->cur_tx = bdp;
 
 	if (fep->tx_free < MAX_SKB_FRAGS)
@@ -623,15 +590,21 @@ static void fs_timeout_work(struct work_struct *work)
 
 	dev->stats.tx_errors++;
 
-	spin_lock_irqsave(&fep->lock, flags);
+	/* In the event a timeout was detected, but the netdev is brought down
+	 * shortly after, it no longer makes sense to try to recover from the
+	 * timeout. netif_running() will return false when called from the
+	 * .ndo_close() callback. Calling the following recovery code while
+	 * called from .ndo_close() could deadlock on rtnl.
+	 */
+	if (!netif_running(dev))
+		return;
 
-	if (dev->flags & IFF_UP) {
-		phy_stop(dev->phydev);
-		(*fep->ops->stop)(dev);
-		(*fep->ops->restart)(dev);
-	}
+	rtnl_lock();
+	phylink_stop(fep->phylink);
+	phylink_start(fep->phylink);
+	rtnl_unlock();
 
-	phy_start(dev->phydev);
+	spin_lock_irqsave(&fep->lock, flags);
 	wake = fep->tx_free >= MAX_SKB_FRAGS &&
 	       !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY);
 	spin_unlock_irqrestore(&fep->lock, flags);
@@ -647,82 +620,37 @@ static void fs_timeout(struct net_device *dev, unsigned int txqueue)
 	schedule_work(&fep->timeout_work);
 }
 
-/*-----------------------------------------------------------------------------
- *  generic link-change handler - should be sufficient for most cases
- *-----------------------------------------------------------------------------*/
-static void generic_adjust_link(struct  net_device *dev)
+static void fs_mac_link_up(struct phylink_config *config,
+			   struct phy_device *phy,
+			   unsigned int mode, phy_interface_t interface,
+			   int speed, int duplex,
+			   bool tx_pause, bool rx_pause)
 {
-	struct fs_enet_private *fep = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	int new_state = 0;
-
-	if (phydev->link) {
-		/* adjust to duplex mode */
-		if (phydev->duplex != fep->oldduplex) {
-			new_state = 1;
-			fep->oldduplex = phydev->duplex;
-		}
-
-		if (phydev->speed != fep->oldspeed) {
-			new_state = 1;
-			fep->oldspeed = phydev->speed;
-		}
-
-		if (!fep->oldlink) {
-			new_state = 1;
-			fep->oldlink = 1;
-		}
-
-		if (new_state)
-			fep->ops->restart(dev);
-	} else if (fep->oldlink) {
-		new_state = 1;
-		fep->oldlink = 0;
-		fep->oldspeed = 0;
-		fep->oldduplex = -1;
-	}
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct fs_enet_private *fep = netdev_priv(ndev);
+	unsigned long flags;
 
-	if (new_state && netif_msg_link(fep))
-		phy_print_status(phydev);
+	spin_lock_irqsave(&fep->lock, flags);
+	fep->ops->restart(ndev, interface, speed, duplex);
+	spin_unlock_irqrestore(&fep->lock, flags);
 }
 
-
-static void fs_adjust_link(struct net_device *dev)
+static void fs_mac_link_down(struct phylink_config *config,
+			     unsigned int mode, phy_interface_t interface)
 {
-	struct fs_enet_private *fep = netdev_priv(dev);
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct fs_enet_private *fep = netdev_priv(ndev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&fep->lock, flags);
-
-	if(fep->ops->adjust_link)
-		fep->ops->adjust_link(dev);
-	else
-		generic_adjust_link(dev);
-
+	fep->ops->stop(ndev);
 	spin_unlock_irqrestore(&fep->lock, flags);
 }
 
-static int fs_init_phy(struct net_device *dev)
+static void fs_mac_config(struct phylink_config *config, unsigned int mode,
+			  const struct phylink_link_state *state)
 {
-	struct fs_enet_private *fep = netdev_priv(dev);
-	struct phy_device *phydev;
-	phy_interface_t iface;
-
-	fep->oldlink = 0;
-	fep->oldspeed = 0;
-	fep->oldduplex = -1;
-
-	iface = fep->fpi->use_rmii ?
-		PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII;
-
-	phydev = of_phy_connect(dev, fep->fpi->phy_node, &fs_adjust_link, 0,
-				iface);
-	if (!phydev) {
-		dev_err(&dev->dev, "Could not attach to PHY\n");
-		return -ENODEV;
-	}
-
-	return 0;
+	/* Nothing to do */
 }
 
 static int fs_enet_open(struct net_device *dev)
@@ -731,8 +659,9 @@ static int fs_enet_open(struct net_device *dev)
 	int r;
 	int err;
 
-	/* to initialize the fep->cur_rx,... */
-	/* not doing this, will cause a crash in fs_enet_napi */
+	/* to initialize the fep->cur_rx,...
+	 * not doing this, will cause a crash in fs_enet_napi
+	 */
 	fs_init_bds(fep->ndev);
 
 	napi_enable(&fep->napi);
@@ -746,13 +675,13 @@ static int fs_enet_open(struct net_device *dev)
 		return -EINVAL;
 	}
 
-	err = fs_init_phy(dev);
+	err = phylink_of_phy_connect(fep->phylink, fep->dev->of_node, 0);
 	if (err) {
 		free_irq(fep->interrupt, dev);
 		napi_disable(&fep->napi);
 		return err;
 	}
-	phy_start(dev->phydev);
+	phylink_start(fep->phylink);
 
 	netif_start_queue(dev);
 
@@ -765,28 +694,25 @@ static int fs_enet_close(struct net_device *dev)
 	unsigned long flags;
 
 	netif_stop_queue(dev);
-	netif_carrier_off(dev);
 	napi_disable(&fep->napi);
-	cancel_work_sync(&fep->timeout_work);
-	phy_stop(dev->phydev);
+	cancel_work(&fep->timeout_work);
+	phylink_stop(fep->phylink);
 
 	spin_lock_irqsave(&fep->lock, flags);
 	spin_lock(&fep->tx_lock);
 	(*fep->ops->stop)(dev);
 	spin_unlock(&fep->tx_lock);
 	spin_unlock_irqrestore(&fep->lock, flags);
+	phylink_disconnect_phy(fep->phylink);
 
 	/* release any irqs */
-	phy_disconnect(dev->phydev);
 	free_irq(fep->interrupt, dev);
 
 	return 0;
 }
 
-/*************************************************************************/
-
 static void fs_get_drvinfo(struct net_device *dev,
-			    struct ethtool_drvinfo *info)
+			   struct ethtool_drvinfo *info)
 {
 	strscpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
 }
@@ -799,7 +725,7 @@ static int fs_get_regs_len(struct net_device *dev)
 }
 
 static void fs_get_regs(struct net_device *dev, struct ethtool_regs *regs,
-			 void *p)
+			void *p)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	unsigned long flags;
@@ -818,12 +744,14 @@ static void fs_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 static u32 fs_get_msglevel(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
+
 	return fep->msg_enable;
 }
 
 static void fs_set_msglevel(struct net_device *dev, u32 value)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
+
 	fep->msg_enable = value;
 }
 
@@ -865,6 +793,22 @@ static int fs_set_tunable(struct net_device *dev,
 	return ret;
 }
 
+static int fs_ethtool_set_link_ksettings(struct net_device *dev,
+					 const struct ethtool_link_ksettings *cmd)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+
+	return phylink_ethtool_ksettings_set(fep->phylink, cmd);
+}
+
+static int fs_ethtool_get_link_ksettings(struct net_device *dev,
+					 struct ethtool_link_ksettings *cmd)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+
+	return phylink_ethtool_ksettings_get(fep->phylink, cmd);
+}
+
 static const struct ethtool_ops fs_ethtool_ops = {
 	.get_drvinfo = fs_get_drvinfo,
 	.get_regs_len = fs_get_regs_len,
@@ -874,14 +818,12 @@ static const struct ethtool_ops fs_ethtool_ops = {
 	.set_msglevel = fs_set_msglevel,
 	.get_regs = fs_get_regs,
 	.get_ts_info = ethtool_op_get_ts_info,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link_ksettings = fs_ethtool_get_link_ksettings,
+	.set_link_ksettings = fs_ethtool_set_link_ksettings,
 	.get_tunable = fs_get_tunable,
 	.set_tunable = fs_set_tunable,
 };
 
-/**************************************************************************************/
-
 #ifdef CONFIG_FS_ENET_HAS_FEC
 #define IS_FEC(ops) ((ops) == &fs_fec_ops)
 #else
@@ -894,7 +836,7 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 	.ndo_start_xmit		= fs_enet_start_xmit,
 	.ndo_tx_timeout		= fs_timeout,
 	.ndo_set_rx_mode	= fs_set_multicast_list,
-	.ndo_eth_ioctl		= phy_do_ioctl_running,
+	.ndo_eth_ioctl		= fs_eth_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -902,17 +844,23 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static const struct phylink_mac_ops fs_enet_phylink_mac_ops = {
+	.mac_config = fs_mac_config,
+	.mac_link_down = fs_mac_link_down,
+	.mac_link_up = fs_mac_link_up,
+};
+
 static int fs_enet_probe(struct platform_device *ofdev)
 {
+	int privsize, len, ret = -ENODEV;
+	struct fs_platform_info *fpi;
+	struct fs_enet_private *fep;
+	phy_interface_t phy_mode;
 	const struct fs_ops *ops;
 	struct net_device *ndev;
-	struct fs_enet_private *fep;
-	struct fs_platform_info *fpi;
+	struct phylink *phylink;
 	const u32 *data;
 	struct clk *clk;
-	int err;
-	const char *phy_connection_type;
-	int privsize, len, ret = -ENODEV;
 
 	ops = device_get_match_data(&ofdev->dev);
 	if (!ops)
@@ -930,51 +878,36 @@ static int fs_enet_probe(struct platform_device *ofdev)
 		fpi->cp_command = *data;
 	}
 
+	ret = of_get_phy_mode(ofdev->dev.of_node, &phy_mode);
+	if (ret) {
+		/* For compatibility, if the mode isn't specified in DT,
+		 * assume MII
+		 */
+		phy_mode = PHY_INTERFACE_MODE_MII;
+	}
+
 	fpi->rx_ring = RX_RING_SIZE;
 	fpi->tx_ring = TX_RING_SIZE;
 	fpi->rx_copybreak = 240;
 	fpi->napi_weight = 17;
-	fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
-	if (!fpi->phy_node && of_phy_is_fixed_link(ofdev->dev.of_node)) {
-		err = of_phy_register_fixed_link(ofdev->dev.of_node);
-		if (err)
-			goto out_free_fpi;
-
-		/* In the case of a fixed PHY, the DT node associated
-		 * to the PHY is the Ethernet MAC DT node.
-		 */
-		fpi->phy_node = of_node_get(ofdev->dev.of_node);
-	}
-
-	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec")) {
-		phy_connection_type = of_get_property(ofdev->dev.of_node,
-						"phy-connection-type", NULL);
-		if (phy_connection_type && !strcmp("rmii", phy_connection_type))
-			fpi->use_rmii = 1;
-	}
 
 	/* make clock lookup non-fatal (the driver is shared among platforms),
 	 * but require enable to succeed when a clock was specified/found,
 	 * keep a reference to the clock upon successful acquisition
 	 */
-	clk = devm_clk_get(&ofdev->dev, "per");
-	if (!IS_ERR(clk)) {
-		ret = clk_prepare_enable(clk);
-		if (ret)
-			goto out_deregister_fixed_link;
-
-		fpi->clk_per = clk;
-	}
+	clk = devm_clk_get_optional_enabled(&ofdev->dev, "per");
+	if (IS_ERR(clk))
+		goto out_free_fpi;
 
 	privsize = sizeof(*fep) +
-	           sizeof(struct sk_buff **) *
+		   sizeof(struct sk_buff **) *
 		     (fpi->rx_ring + fpi->tx_ring) +
 		   sizeof(char) * fpi->tx_ring;
 
 	ndev = alloc_etherdev(privsize);
 	if (!ndev) {
 		ret = -ENOMEM;
-		goto out_put;
+		goto out_free_fpi;
 	}
 
 	SET_NETDEV_DEV(ndev, &ofdev->dev);
@@ -986,9 +919,29 @@ static int fs_enet_probe(struct platform_device *ofdev)
 	fep->fpi = fpi;
 	fep->ops = ops;
 
+	fep->phylink_config.dev = &ndev->dev;
+	fep->phylink_config.type = PHYLINK_NETDEV;
+	fep->phylink_config.mac_capabilities = MAC_10 | MAC_100;
+
+	__set_bit(PHY_INTERFACE_MODE_MII,
+		  fep->phylink_config.supported_interfaces);
+
+	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec"))
+		__set_bit(PHY_INTERFACE_MODE_RMII,
+			  fep->phylink_config.supported_interfaces);
+
+	phylink = phylink_create(&fep->phylink_config, dev_fwnode(fep->dev),
+				 phy_mode, &fs_enet_phylink_mac_ops);
+	if (IS_ERR(phylink)) {
+		ret = PTR_ERR(phylink);
+		goto out_free_dev;
+	}
+
+	fep->phylink = phylink;
+
 	ret = fep->ops->setup_data(ndev);
 	if (ret)
-		goto out_free_dev;
+		goto out_phylink;
 
 	fep->rx_skbuff = (struct sk_buff **)&fep[1];
 	fep->tx_skbuff = fep->rx_skbuff + fpi->rx_ring;
@@ -1018,8 +971,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
 
-	netif_carrier_off(ndev);
-
 	ndev->features |= NETIF_F_SG;
 
 	ret = register_netdev(ndev);
@@ -1034,14 +985,10 @@ out_free_bd:
 	fep->ops->free_bd(ndev);
 out_cleanup_data:
 	fep->ops->cleanup_data(ndev);
+out_phylink:
+	phylink_destroy(fep->phylink);
 out_free_dev:
 	free_netdev(ndev);
-out_put:
-	clk_disable_unprepare(fpi->clk_per);
-out_deregister_fixed_link:
-	of_node_put(fpi->phy_node);
-	if (of_phy_is_fixed_link(ofdev->dev.of_node))
-		of_phy_deregister_fixed_link(ofdev->dev.of_node);
 out_free_fpi:
 	kfree(fpi);
 	return ret;
@@ -1057,10 +1004,7 @@ static void fs_enet_remove(struct platform_device *ofdev)
 	fep->ops->free_bd(ndev);
 	fep->ops->cleanup_data(ndev);
 	dev_set_drvdata(fep->dev, NULL);
-	of_node_put(fep->fpi->phy_node);
-	clk_disable_unprepare(fep->fpi->clk_per);
-	if (of_phy_is_fixed_link(ofdev->dev.of_node))
-		of_phy_deregister_fixed_link(ofdev->dev.of_node);
+	phylink_destroy(fep->phylink);
 	free_netdev(ndev);
 }
 
@@ -1114,9 +1058,9 @@ static struct platform_driver fs_enet_driver = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void fs_enet_netpoll(struct net_device *dev)
 {
-       disable_irq(dev->irq);
-       fs_enet_interrupt(dev->irq, dev);
-       enable_irq(dev->irq);
+	disable_irq(dev->irq);
+	fs_enet_interrupt(dev->irq, dev);
+	enable_irq(dev->irq);
 }
 #endif
 
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 21c07ac05225..36e4fcc29e36 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -3,11 +3,11 @@
 #define FS_ENET_H
 
 #include <linux/clk.h>
-#include <linux/mii.h>
 #include <linux/netdevice.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/dma-mapping.h>
 
 #ifdef CONFIG_CPM1
@@ -77,8 +77,8 @@ struct fs_ops {
 	void (*free_bd)(struct net_device *dev);
 	void (*cleanup_data)(struct net_device *dev);
 	void (*set_multicast_list)(struct net_device *dev);
-	void (*adjust_link)(struct net_device *dev);
-	void (*restart)(struct net_device *dev);
+	void (*restart)(struct net_device *dev, phy_interface_t interface,
+			int speed, int duplex);
 	void (*stop)(struct net_device *dev);
 	void (*napi_clear_event)(struct net_device *dev);
 	void (*napi_enable)(struct net_device *dev);
@@ -93,14 +93,6 @@ struct fs_ops {
 	void (*tx_restart)(struct net_device *dev);
 };
 
-struct phy_info {
-	unsigned int id;
-	const char *name;
-	void (*startup) (struct net_device * dev);
-	void (*shutdown) (struct net_device * dev);
-	void (*ack_int) (struct net_device * dev);
-};
-
 /* The FEC stores dest/src/type, data, and checksum for receive packets.
  */
 #define MAX_MTU 1508		/* Allow fullsized pppoe packets over VLAN */
@@ -122,15 +114,9 @@ struct fs_platform_info {
 
 	u32 dpram_offset;
 
-	struct device_node *phy_node;
-
 	int rx_ring, tx_ring;	/* number of buffers on rx	*/
 	int rx_copybreak;	/* limit we copy small frames	*/
 	int napi_weight;	/* NAPI weight			*/
-
-	int use_rmii;		/* use RMII mode		*/
-
-	struct clk *clk_per;	/* 'per' clock for register access */
 };
 
 struct fs_enet_private {
@@ -154,14 +140,11 @@ struct fs_enet_private {
 	cbd_t __iomem *cur_rx;
 	cbd_t __iomem *cur_tx;
 	int tx_free;
-	const struct phy_info *phy;
 	u32 msg_enable;
-	struct mii_if_info mii_if;
-	unsigned int last_mii_status;
+	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	int interrupt;
 
-	int oldduplex, oldspeed, oldlink;	/* current settings */
-
 	/* event masks */
 	u32 ev_napi;		/* mask of NAPI events */
 	u32 ev;			/* event mask          */
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index e2ffac9eb2ad..be63293511d9 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * FCC driver for Motorola MPC82xx (PQ2).
  *
@@ -6,10 +7,6 @@
  *
  * 2005 (c) MontaVista Software, Inc.
  * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
@@ -25,7 +22,6 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
-#include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
 #include <linux/fs.h>
@@ -239,7 +235,8 @@ static void set_multicast_list(struct net_device *dev)
 		set_promiscuous_mode(dev);
 }
 
-static void restart(struct net_device *dev)
+static void restart(struct net_device *dev, phy_interface_t interface,
+		    int speed, int duplex)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	const struct fs_platform_info *fpi = fep->fpi;
@@ -363,8 +360,8 @@ static void restart(struct net_device *dev)
 	fs_init_bds(dev);
 
 	/* adjust to speed (for RMII mode) */
-	if (fpi->use_rmii) {
-		if (dev->phydev->speed == 100)
+	if (interface == PHY_INTERFACE_MODE_RMII) {
+		if (speed == SPEED_100)
 			C8(fcccp, fcc_gfemr, 0x20);
 		else
 			S8(fcccp, fcc_gfemr, 0x20);
@@ -386,11 +383,11 @@ static void restart(struct net_device *dev)
 
 	W32(fccp, fcc_fpsmr, FCC_PSMR_ENCRC);
 
-	if (fpi->use_rmii)
+	if (interface == PHY_INTERFACE_MODE_RMII)
 		S32(fccp, fcc_fpsmr, FCC_PSMR_RMII);
 
 	/* adjust to duplex mode */
-	if (dev->phydev->duplex)
+	if (duplex == DUPLEX_FULL)
 		S32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB);
 	else
 		C32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB);
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index cdc89d83cf07..f2ecd20027cf 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Freescale Ethernet controllers
  *
@@ -6,10 +7,6 @@
  *
  * 2005 (c) MontaVista Software, Inc.
  * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
@@ -26,7 +23,6 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
-#include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
 #include <linux/fs.h>
@@ -224,7 +220,8 @@ static void set_multicast_list(struct net_device *dev)
 		set_promiscuous_mode(dev);
 }
 
-static void restart(struct net_device *dev)
+static void restart(struct net_device *dev, phy_interface_t interface,
+		    int speed, int duplex)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	struct fec __iomem *fecp = fep->fec.fecp;
@@ -306,13 +303,13 @@ static void restart(struct net_device *dev)
 	 * Only set MII/RMII mode - do not touch maximum frame length
 	 * configured before.
 	 */
-	FS(fecp, r_cntrl, fpi->use_rmii ?
-			FEC_RCNTRL_RMII_MODE : FEC_RCNTRL_MII_MODE);
+	FS(fecp, r_cntrl, interface == PHY_INTERFACE_MODE_RMII ?
+			  FEC_RCNTRL_RMII_MODE : FEC_RCNTRL_MII_MODE);
 #endif
 	/*
 	 * adjust to duplex mode
 	 */
-	if (dev->phydev->duplex) {
+	if (duplex == DUPLEX_FULL) {
 		FC(fecp, r_cntrl, FEC_RCNTRL_DRT);
 		FS(fecp, x_cntrl, FEC_TCNTRL_FDEN);	/* FD enable */
 	} else {
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index a64cb6270515..6c97191649de 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Ethernet on Serial Communications Controller (SCC) driver for Motorola MPC8xx and MPC82xx.
  *
@@ -6,10 +7,6 @@
  *
  * 2005 (c) MontaVista Software, Inc.
  * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
@@ -25,7 +22,6 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
-#include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
 #include <linux/fs.h>
@@ -131,15 +127,14 @@ static int setup_data(struct net_device *dev)
 static int allocate_bd(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
-	const struct fs_platform_info *fpi = fep->fpi;
+	struct fs_platform_info *fpi = fep->fpi;
 
-	fep->ring_mem_addr = cpm_muram_alloc((fpi->tx_ring + fpi->rx_ring) *
-					     sizeof(cbd_t), 8);
-	if (IS_ERR_VALUE(fep->ring_mem_addr))
+	fpi->dpram_offset = cpm_muram_alloc((fpi->tx_ring + fpi->rx_ring) *
+					    sizeof(cbd_t), 8);
+	if (IS_ERR_VALUE(fpi->dpram_offset))
 		return -ENOMEM;
 
-	fep->ring_base = (void __iomem __force*)
-		cpm_muram_addr(fep->ring_mem_addr);
+	fep->ring_base = cpm_muram_addr(fpi->dpram_offset);
 
 	return 0;
 }
@@ -147,9 +142,10 @@ static int allocate_bd(struct net_device *dev)
 static void free_bd(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
+	const struct fs_platform_info *fpi = fep->fpi;
 
 	if (fep->ring_base)
-		cpm_muram_free(fep->ring_mem_addr);
+		cpm_muram_free(fpi->dpram_offset);
 }
 
 static void cleanup_data(struct net_device *dev)
@@ -230,7 +226,8 @@ static void set_multicast_list(struct net_device *dev)
  * change.  This only happens when switching between half and full
  * duplex.
  */
-static void restart(struct net_device *dev)
+static void restart(struct net_device *dev, phy_interface_t interface,
+		    int speed, int duplex)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	scc_t __iomem *sccp = fep->scc.sccp;
@@ -247,9 +244,9 @@ static void restart(struct net_device *dev)
 		__fs_out8((u8 __iomem *)ep + i, 0);
 
 	/* point to bds */
-	W16(ep, sen_genscc.scc_rbase, fep->ring_mem_addr);
+	W16(ep, sen_genscc.scc_rbase, fpi->dpram_offset);
 	W16(ep, sen_genscc.scc_tbase,
-	    fep->ring_mem_addr + sizeof(cbd_t) * fpi->rx_ring);
+	    fpi->dpram_offset + sizeof(cbd_t) * fpi->rx_ring);
 
 	/* Initialize function code registers for big-endian.
 	 */
@@ -341,7 +338,7 @@ static void restart(struct net_device *dev)
 	W16(sccp, scc_psmr, SCC_PSMR_ENCRC | SCC_PSMR_NIB22);
 
 	/* Set full duplex mode if needed */
-	if (dev->phydev->duplex)
+	if (duplex == DUPLEX_FULL)
 		S16(sccp, scc_psmr, SCC_PSMR_LPB | SCC_PSMR_FDE);
 
 	/* Restore multicast and promiscuous settings */
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index f965a2329055..2e210a003558 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Combined Ethernet driver for Motorola MPC8xx and MPC82xx.
  *
@@ -6,10 +7,6 @@
  *
  * 2005 (c) MontaVista Software, Inc.
  * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index 7bb69727952a..93d91e8ad0de 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Combined Ethernet driver for Motorola MPC8xx and MPC82xx.
  *
@@ -6,10 +7,6 @@
  *
  * 2005 (c) MontaVista Software, Inc.
  * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/module.h>
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 2baef59f741d..ecb1703ea150 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -754,6 +754,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
 
 	err = of_get_ethdev_address(np, dev);
+	if (err == -EPROBE_DEFER)
+		goto err_grp_init;
 	if (err) {
 		eth_hw_addr_random(dev);
 		dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr);
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index f581402ad740..a99b95c4bcfb 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1455,12 +1455,8 @@ static int gfar_get_ts_info(struct net_device *dev,
 	struct device_node *ptp_node;
 	struct ptp_qoriq *ptp = NULL;
 
-	info->phc_index = -1;
-
 	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)) {
-		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-					SOF_TIMESTAMPING_TX_SOFTWARE |
-					SOF_TIMESTAMPING_SOFTWARE;
+		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	}
 
@@ -1478,9 +1474,7 @@ static int gfar_get_ts_info(struct net_device *dev,
 	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
+				SOF_TIMESTAMPING_TX_SOFTWARE;
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
 			 (1 << HWTSTAMP_TX_ON);
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c
index a7fbd4cd560a..ce97b76f9ae0 100644
--- a/drivers/net/ethernet/fungible/funcore/fun_dev.c
+++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c
@@ -546,17 +546,14 @@ int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
 	     unsigned int id0, enum fun_admin_bind_type type1,
 	     unsigned int id1)
 {
-	struct {
-		struct fun_admin_bind_req req;
-		struct fun_admin_bind_entry entry[2];
-	} cmd = {
-		.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND,
-							 sizeof(cmd)),
-		.entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0),
-		.entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1),
-	};
+	DEFINE_RAW_FLEX(struct fun_admin_bind_req, cmd, entry, 2);
+
+	cmd->common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND,
+						 __struct_size(cmd));
+	cmd->entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0);
+	cmd->entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1);
 
-	return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0);
+	return fun_submit_admin_sync_cmd(fdev, &cmd->common, NULL, 0, 0);
 }
 EXPORT_SYMBOL_GPL(fun_bind);
 
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
index 7f081e6e8c87..ba83dbf4ed22 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
@@ -1042,12 +1042,9 @@ static int fun_set_rxfh(struct net_device *netdev,
 static int fun_get_ts_info(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info)
 {
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_HARDWARE |
+	info->so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
-	info->phc_index = -1;
 	info->tx_types = BIT(HWTSTAMP_TX_OFF);
 	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
 	return 0;
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 84ac004d3953..301fa1ea4f51 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -784,6 +784,8 @@ struct gve_priv {
 	u32 adminq_verify_driver_compatibility_cnt;
 	u32 adminq_query_flow_rules_cnt;
 	u32 adminq_cfg_flow_rule_cnt;
+	u32 adminq_cfg_rss_cnt;
+	u32 adminq_query_rss_cnt;
 
 	/* Global stats */
 	u32 interface_up_cnt; /* count of times interface turned up since last reset */
@@ -831,6 +833,9 @@ struct gve_priv {
 	u32 num_flow_rules;
 
 	struct gve_flow_rules_cache flow_rules_cache;
+
+	u16 rss_key_size;
+	u16 rss_lut_size;
 };
 
 enum gve_service_task_flags_bit {
@@ -1148,7 +1153,6 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
 			  int idx);
 void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
 			  struct gve_rx_alloc_rings_cfg *cfg);
-int gve_rx_alloc_rings(struct gve_priv *priv);
 int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
 			   struct gve_rx_alloc_rings_cfg *cfg);
 void gve_rx_free_rings_gqi(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index c5bbc1b7524e..e44e8b139633 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -45,6 +45,7 @@ void gve_parse_device_option(struct gve_priv *priv,
 			     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
 			     struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
 			     struct gve_device_option_flow_steering **dev_op_flow_steering,
+			     struct gve_device_option_rss_config **dev_op_rss_config,
 			     struct gve_device_option_modify_ring **dev_op_modify_ring)
 {
 	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
@@ -207,6 +208,23 @@ void gve_parse_device_option(struct gve_priv *priv,
 				 "Flow Steering");
 		*dev_op_flow_steering = (void *)(option + 1);
 		break;
+	case GVE_DEV_OPT_ID_RSS_CONFIG:
+		if (option_length < sizeof(**dev_op_rss_config) ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "RSS config",
+				 (int)sizeof(**dev_op_rss_config),
+				 GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG,
+				 option_length, req_feat_mask);
+			break;
+		}
+
+		if (option_length > sizeof(**dev_op_rss_config))
+			dev_warn(&priv->pdev->dev,
+				 GVE_DEVICE_OPTION_TOO_BIG_FMT,
+				 "RSS config");
+		*dev_op_rss_config = (void *)(option + 1);
+		break;
 	default:
 		/* If we don't recognize the option just continue
 		 * without doing anything.
@@ -227,6 +245,7 @@ gve_process_device_options(struct gve_priv *priv,
 			   struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
 			   struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
 			   struct gve_device_option_flow_steering **dev_op_flow_steering,
+			   struct gve_device_option_rss_config **dev_op_rss_config,
 			   struct gve_device_option_modify_ring **dev_op_modify_ring)
 {
 	const int num_options = be16_to_cpu(descriptor->num_device_options);
@@ -249,7 +268,8 @@ gve_process_device_options(struct gve_priv *priv,
 					dev_op_gqi_rda, dev_op_gqi_qpl,
 					dev_op_dqo_rda, dev_op_jumbo_frames,
 					dev_op_dqo_qpl, dev_op_buffer_sizes,
-					dev_op_flow_steering, dev_op_modify_ring);
+					dev_op_flow_steering, dev_op_rss_config,
+					dev_op_modify_ring);
 		dev_opt = next_opt;
 	}
 
@@ -289,6 +309,8 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
 	priv->adminq_get_ptype_map_cnt = 0;
 	priv->adminq_query_flow_rules_cnt = 0;
 	priv->adminq_cfg_flow_rule_cnt = 0;
+	priv->adminq_cfg_rss_cnt = 0;
+	priv->adminq_query_rss_cnt = 0;
 
 	/* Setup Admin queue with the device */
 	if (priv->pdev->revision < 0x1) {
@@ -534,6 +556,12 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
 	case GVE_ADMINQ_CONFIGURE_FLOW_RULE:
 		priv->adminq_cfg_flow_rule_cnt++;
 		break;
+	case GVE_ADMINQ_CONFIGURE_RSS:
+		priv->adminq_cfg_rss_cnt++;
+		break;
+	case GVE_ADMINQ_QUERY_RSS:
+		priv->adminq_query_rss_cnt++;
+		break;
 	default:
 		dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
 	}
@@ -867,6 +895,8 @@ static void gve_enable_supported_features(struct gve_priv *priv,
 					  *dev_op_buffer_sizes,
 					  const struct gve_device_option_flow_steering
 					  *dev_op_flow_steering,
+					  const struct gve_device_option_rss_config
+					  *dev_op_rss_config,
 					  const struct gve_device_option_modify_ring
 					  *dev_op_modify_ring)
 {
@@ -931,6 +961,14 @@ static void gve_enable_supported_features(struct gve_priv *priv,
 				 priv->max_flow_rules);
 		}
 	}
+
+	if (dev_op_rss_config &&
+	    (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) {
+		priv->rss_key_size =
+			be16_to_cpu(dev_op_rss_config->hash_key_size);
+		priv->rss_lut_size =
+			be16_to_cpu(dev_op_rss_config->hash_lut_size);
+	}
 }
 
 int gve_adminq_describe_device(struct gve_priv *priv)
@@ -939,6 +977,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
 	struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
 	struct gve_device_option_modify_ring *dev_op_modify_ring = NULL;
+	struct gve_device_option_rss_config *dev_op_rss_config = NULL;
 	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
 	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
 	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
@@ -973,6 +1012,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 					 &dev_op_jumbo_frames, &dev_op_dqo_qpl,
 					 &dev_op_buffer_sizes,
 					 &dev_op_flow_steering,
+					 &dev_op_rss_config,
 					 &dev_op_modify_ring);
 	if (err)
 		goto free_device_descriptor;
@@ -1035,7 +1075,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	gve_enable_supported_features(priv, supported_features_mask,
 				      dev_op_jumbo_frames, dev_op_dqo_qpl,
 				      dev_op_buffer_sizes, dev_op_flow_steering,
-				      dev_op_modify_ring);
+				      dev_op_rss_config, dev_op_modify_ring);
 
 free_device_descriptor:
 	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
@@ -1248,6 +1288,81 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv)
 	return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
 }
 
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+	dma_addr_t lut_bus = 0, key_bus = 0;
+	u16 key_size = 0, lut_size = 0;
+	union gve_adminq_command cmd;
+	__be32 *lut = NULL;
+	u8 hash_alg = 0;
+	u8 *key = NULL;
+	int err = 0;
+	u16 i;
+
+	switch (rxfh->hfunc) {
+	case ETH_RSS_HASH_NO_CHANGE:
+		break;
+	case ETH_RSS_HASH_TOP:
+		hash_alg = ETH_RSS_HASH_TOP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (rxfh->indir) {
+		lut_size = priv->rss_lut_size;
+		lut = dma_alloc_coherent(&priv->pdev->dev,
+					 lut_size * sizeof(*lut),
+					 &lut_bus, GFP_KERNEL);
+		if (!lut)
+			return -ENOMEM;
+
+		for (i = 0; i < priv->rss_lut_size; i++)
+			lut[i] = cpu_to_be32(rxfh->indir[i]);
+	}
+
+	if (rxfh->key) {
+		key_size = priv->rss_key_size;
+		key = dma_alloc_coherent(&priv->pdev->dev,
+					 key_size, &key_bus, GFP_KERNEL);
+		if (!key) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		memcpy(key, rxfh->key, key_size);
+	}
+
+	/* Zero-valued fields in the cmd.configure_rss instruct the device to
+	 * not update those fields.
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS);
+	cmd.configure_rss = (struct gve_adminq_configure_rss) {
+		.hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) |
+					  BIT(GVE_RSS_HASH_UDPV4) |
+					  BIT(GVE_RSS_HASH_TCPV6) |
+					  BIT(GVE_RSS_HASH_UDPV6)),
+		.hash_alg = hash_alg,
+		.hash_key_size = cpu_to_be16(key_size),
+		.hash_lut_size = cpu_to_be16(lut_size),
+		.hash_key_addr = cpu_to_be64(key_bus),
+		.hash_lut_addr = cpu_to_be64(lut_bus),
+	};
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+
+out:
+	if (lut)
+		dma_free_coherent(&priv->pdev->dev,
+				  lut_size * sizeof(*lut),
+				  lut, lut_bus);
+	if (key)
+		dma_free_coherent(&priv->pdev->dev,
+				  key_size, key, key_bus);
+	return err;
+}
+
 /* In the dma memory that the driver allocated for the device to query the flow rules, the device
  * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device
  * will write an array of rules or rule ids with the count that specified in the descriptor.
@@ -1325,3 +1440,66 @@ out:
 	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
 	return err;
 }
+
+static int gve_adminq_process_rss_query(struct gve_priv *priv,
+					struct gve_query_rss_descriptor *descriptor,
+					struct ethtool_rxfh_param *rxfh)
+{
+	u32 total_memory_length;
+	u16 hash_lut_length;
+	void *rss_info_addr;
+	__be32 *lut;
+	u16 i;
+
+	total_memory_length = be32_to_cpu(descriptor->total_length);
+	hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir);
+
+	if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) {
+		dev_err(&priv->dev->dev,
+			"rss query desc from device has invalid length parameter.\n");
+		return -EINVAL;
+	}
+
+	rxfh->hfunc = descriptor->hash_alg;
+
+	rss_info_addr = (void *)(descriptor + 1);
+	if (rxfh->key)
+		memcpy(rxfh->key, rss_info_addr, priv->rss_key_size);
+
+	rss_info_addr += priv->rss_key_size;
+	lut = (__be32 *)rss_info_addr;
+	if (rxfh->indir) {
+		for (i = 0; i < priv->rss_lut_size; i++)
+			rxfh->indir[i] = be32_to_cpu(lut[i]);
+	}
+
+	return 0;
+}
+
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+	struct gve_query_rss_descriptor *descriptor;
+	union gve_adminq_command cmd;
+	dma_addr_t descriptor_bus;
+	int err = 0;
+
+	descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
+	if (!descriptor)
+		return -ENOMEM;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS);
+	cmd.query_rss = (struct gve_adminq_query_rss) {
+		.available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
+		.rss_descriptor_addr = cpu_to_be64(descriptor_bus),
+	};
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	if (err)
+		goto out;
+
+	err = gve_adminq_process_rss_query(priv, descriptor, rxfh);
+
+out:
+	dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
+	return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index ed1370c9b197..863683de9694 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -20,12 +20,14 @@ enum gve_adminq_opcodes {
 	GVE_ADMINQ_DESTROY_TX_QUEUE		= 0x7,
 	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
 	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
+	GVE_ADMINQ_CONFIGURE_RSS		= 0xA,
 	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
 	GVE_ADMINQ_REPORT_STATS			= 0xC,
 	GVE_ADMINQ_REPORT_LINK_SPEED		= 0xD,
 	GVE_ADMINQ_GET_PTYPE_MAP		= 0xE,
 	GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY	= 0xF,
 	GVE_ADMINQ_QUERY_FLOW_RULES		= 0x10,
+	GVE_ADMINQ_QUERY_RSS			= 0x12,
 
 	/* For commands that are larger than 56 bytes */
 	GVE_ADMINQ_EXTENDED_COMMAND		= 0xFF,
@@ -164,6 +166,14 @@ struct gve_device_option_flow_steering {
 
 static_assert(sizeof(struct gve_device_option_flow_steering) == 12);
 
+struct gve_device_option_rss_config {
+	__be32 supported_features_mask;
+	__be16 hash_key_size;
+	__be16 hash_lut_size;
+};
+
+static_assert(sizeof(struct gve_device_option_rss_config) == 8);
+
 /* Terminology:
  *
  * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
@@ -182,6 +192,7 @@ enum gve_dev_opt_id {
 	GVE_DEV_OPT_ID_JUMBO_FRAMES		= 0x8,
 	GVE_DEV_OPT_ID_BUFFER_SIZES		= 0xa,
 	GVE_DEV_OPT_ID_FLOW_STEERING		= 0xb,
+	GVE_DEV_OPT_ID_RSS_CONFIG		= 0xe,
 };
 
 enum gve_dev_opt_req_feat_mask {
@@ -194,6 +205,7 @@ enum gve_dev_opt_req_feat_mask {
 	GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES		= 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING		= 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING		= 0x0,
+	GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG		= 0x0,
 };
 
 enum gve_sup_feature_mask {
@@ -201,6 +213,7 @@ enum gve_sup_feature_mask {
 	GVE_SUP_JUMBO_FRAMES_MASK	= 1 << 2,
 	GVE_SUP_BUFFER_SIZES_MASK	= 1 << 4,
 	GVE_SUP_FLOW_STEERING_MASK	= 1 << 5,
+	GVE_SUP_RSS_CONFIG_MASK		= 1 << 7,
 };
 
 #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
@@ -214,6 +227,7 @@ enum gve_driver_capbility {
 	gve_driver_capability_dqo_rda = 3,
 	gve_driver_capability_alt_miss_compl = 4,
 	gve_driver_capability_flexible_buffer_size = 5,
+	gve_driver_capability_flexible_rss_size = 6,
 };
 
 #define GVE_CAP1(a) BIT((int)a)
@@ -226,7 +240,8 @@ enum gve_driver_capbility {
 	 GVE_CAP1(gve_driver_capability_gqi_rda) | \
 	 GVE_CAP1(gve_driver_capability_dqo_rda) | \
 	 GVE_CAP1(gve_driver_capability_alt_miss_compl) | \
-	 GVE_CAP1(gve_driver_capability_flexible_buffer_size))
+	 GVE_CAP1(gve_driver_capability_flexible_buffer_size) | \
+	 GVE_CAP1(gve_driver_capability_flexible_rss_size))
 
 #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
@@ -509,6 +524,44 @@ struct gve_adminq_query_flow_rules {
 
 static_assert(sizeof(struct gve_adminq_query_flow_rules) == 24);
 
+enum gve_rss_hash_type {
+	GVE_RSS_HASH_IPV4,
+	GVE_RSS_HASH_TCPV4,
+	GVE_RSS_HASH_IPV6,
+	GVE_RSS_HASH_IPV6_EX,
+	GVE_RSS_HASH_TCPV6,
+	GVE_RSS_HASH_TCPV6_EX,
+	GVE_RSS_HASH_UDPV4,
+	GVE_RSS_HASH_UDPV6,
+	GVE_RSS_HASH_UDPV6_EX,
+};
+
+struct gve_adminq_configure_rss {
+	__be16 hash_types;
+	u8 hash_alg;
+	u8 reserved;
+	__be16 hash_key_size;
+	__be16 hash_lut_size;
+	__be64 hash_key_addr;
+	__be64 hash_lut_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_rss) == 24);
+
+struct gve_query_rss_descriptor {
+	__be32 total_length;
+	__be16 hash_types;
+	u8 hash_alg;
+	u8 reserved;
+};
+
+struct gve_adminq_query_rss {
+	__be64 available_length;
+	__be64 rss_descriptor_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_query_rss) == 16);
+
 union gve_adminq_command {
 	struct {
 		__be32 opcode;
@@ -530,6 +583,8 @@ union gve_adminq_command {
 			struct gve_adminq_verify_driver_compatibility
 						verify_driver_compatibility;
 			struct gve_adminq_query_flow_rules query_flow_rules;
+			struct gve_adminq_configure_rss configure_rss;
+			struct gve_adminq_query_rss query_rss;
 			struct gve_adminq_extended_command extended_command;
 		};
 	};
@@ -568,6 +623,8 @@ int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule
 int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc);
 int gve_adminq_reset_flow_rules(struct gve_priv *priv);
 int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc);
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
 
 struct gve_ptype_lut;
 int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 3480ff5c7ed6..bdfc6e77b2af 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -75,7 +75,8 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
 	"adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
 	"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
 	"adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt",
-	"adminq_query_flow_rules", "adminq_cfg_flow_rule",
+	"adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt",
+	"adminq_query_rss_cnt",
 };
 
 static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
@@ -453,6 +454,8 @@ gve_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = priv->adminq_get_ptype_map_cnt;
 	data[i++] = priv->adminq_query_flow_rules_cnt;
 	data[i++] = priv->adminq_cfg_flow_rule_cnt;
+	data[i++] = priv->adminq_cfg_rss_cnt;
+	data[i++] = priv->adminq_query_rss_cnt;
 }
 
 static void gve_get_channels(struct net_device *netdev,
@@ -495,7 +498,7 @@ static int gve_set_channels(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (!netif_carrier_ok(netdev)) {
+	if (!netif_running(netdev)) {
 		priv->tx_cfg.num_queues = new_tx;
 		priv->rx_cfg.num_queues = new_rx;
 		return 0;
@@ -838,6 +841,41 @@ static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u
 	return err;
 }
 
+static u32 gve_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	return priv->rss_key_size;
+}
+
+static u32 gve_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	return priv->rss_lut_size;
+}
+
+static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	if (!priv->rss_key_size || !priv->rss_lut_size)
+		return -EOPNOTSUPP;
+
+	return gve_adminq_query_rss_config(priv, rxfh);
+}
+
+static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh,
+			struct netlink_ext_ack *extack)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	if (!priv->rss_key_size || !priv->rss_lut_size)
+		return -EOPNOTSUPP;
+
+	return gve_adminq_configure_rss(priv, rxfh);
+}
+
 const struct ethtool_ops gve_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
 	.supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
@@ -851,6 +889,10 @@ const struct ethtool_ops gve_ethtool_ops = {
 	.get_channels = gve_get_channels,
 	.set_rxnfc = gve_set_rxnfc,
 	.get_rxnfc = gve_get_rxnfc,
+	.get_rxfh_indir_size = gve_get_rxfh_indir_size,
+	.get_rxfh_key_size = gve_get_rxfh_key_size,
+	.get_rxfh = gve_get_rxfh,
+	.set_rxfh = gve_set_rxfh,
 	.get_link = ethtool_op_get_link,
 	.get_coalesce = gve_get_coalesce,
 	.set_coalesce = gve_set_coalesce,
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 9744b426940e..661566db68c8 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
 	u32 status;
 
 	old_prog = READ_ONCE(priv->xdp_prog);
-	if (!netif_carrier_ok(priv->dev)) {
+	if (!netif_running(priv->dev)) {
 		WRITE_ONCE(priv->xdp_prog, prog);
 		if (old_prog)
 			bpf_prog_put(old_prog);
@@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv,
 	rx_alloc_cfg.qcfg = &new_rx_config;
 	tx_alloc_cfg.num_rings = new_tx_config.num_queues;
 
-	if (netif_carrier_ok(priv->dev)) {
+	if (netif_running(priv->dev)) {
 		err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
 		return err;
 	}
@@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev,
 
 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
 		netdev->features ^= NETIF_F_LRO;
-		if (netif_carrier_ok(netdev)) {
+		if (netif_running(netdev)) {
 			err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
 			if (err)
 				goto revert_features;
@@ -2359,7 +2359,7 @@ err:
 
 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
 {
-	bool was_up = netif_carrier_ok(priv->dev);
+	bool was_up = netif_running(priv->dev);
 	int err;
 
 	dev_info(&priv->pdev->dev, "Performing reset\n");
@@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct gve_priv *priv = netdev_priv(netdev);
-	bool was_up = netif_carrier_ok(priv->dev);
+	bool was_up = netif_running(priv->dev);
 
 	rtnl_lock();
 	if (was_up && gve_close(priv->dev)) {
@@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct gve_priv *priv = netdev_priv(netdev);
-	bool was_up = netif_carrier_ok(priv->dev);
+	bool was_up = netif_running(priv->dev);
 
 	priv->suspend_cnt++;
 	rtnl_lock();
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index b91e7a06b97f..beb815e5289b 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -947,6 +947,7 @@ static int hip04_mac_probe(struct platform_device *pdev)
 	priv->tx_coalesce_timer.function = tx_done;
 
 	priv->map = syscon_node_to_regmap(arg.np);
+	of_node_put(arg.np);
 	if (IS_ERR(priv->map)) {
 		dev_warn(d, "no syscon hisilicon,hip04-ppe\n");
 		ret = PTR_ERR(priv->map);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index f75668c47935..58baac7103b3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -734,7 +734,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
 		return -ENODATA;
 
 	phy = get_phy_device(mdio, addr, is_c45);
-	if (!phy || IS_ERR(phy))
+	if (IS_ERR_OR_NULL(phy))
 		return -EIO;
 
 	phy->irq = mdio->irq[addr];
@@ -933,6 +933,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
 			mac_cb->cpld_ctrl = NULL;
 		} else {
 			syscon = syscon_node_to_regmap(cpld_args.np);
+			of_node_put(cpld_args.np);
 			if (IS_ERR_OR_NULL(syscon)) {
 				dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
 				mac_cb->cpld_ctrl = NULL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index a5fc0209d628..4cbc4d069a1f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 	struct net_device *netdev = handle->kinfo.netdev;
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 
+	if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+		hns3_nic_net_stop(netdev);
+
 	if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
 		netdev_warn(netdev, "already uninitialized\n");
 		return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index e132c2f09560..cc7f46c0b35f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1598,8 +1598,7 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev)
 {
 	u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0};
 	struct hclge_mod_reg_common_msg msg;
-	u8 i, j, num;
-	u32 loop_time;
+	u8 i, j, num, loop_time;
 
 	num = ARRAY_SIZE(hclge_ssu_reg_common_msg);
 	for (i = 0; i < num; i++) {
@@ -1609,7 +1608,8 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev)
 		loop_time = 1;
 		loop_para[0] = 0;
 		if (msg.need_para) {
-			loop_time = hdev->ae_dev->dev_specs.tnl_num;
+			loop_time = min(hdev->ae_dev->dev_specs.tnl_num,
+					HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE);
 			for (j = 0; j < loop_time; j++)
 				loop_para[j] = j + 1;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 82574ce0194f..bd86efd92a5a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -13,8 +13,9 @@
 #include <linux/platform_device.h>
 #include <linux/if_vlan.h>
 #include <linux/crash_dump.h>
-#include <net/ipv6.h>
+
 #include <net/rtnetlink.h>
+
 #include "hclge_cmd.h"
 #include "hclge_dcb.h"
 #include "hclge_main.h"
@@ -2653,8 +2654,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
+
+	if (ret)
+		return ret;
+
+	hdev->hw.mac.req_speed = speed;
+	hdev->hw.mac.req_duplex = duplex;
 
-	return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num);
+	return 0;
 }
 
 static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
@@ -2956,17 +2966,20 @@ static int hclge_mac_init(struct hclge_dev *hdev)
 	if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 		hdev->hw.mac.duplex = HCLGE_MAC_FULL;
 
-	ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
-					 hdev->hw.mac.duplex, hdev->hw.mac.lane_num);
-	if (ret)
-		return ret;
-
 	if (hdev->hw.mac.support_autoneg) {
 		ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
 		if (ret)
 			return ret;
 	}
 
+	if (!hdev->hw.mac.autoneg) {
+		ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed,
+						 hdev->hw.mac.req_duplex,
+						 hdev->hw.mac.lane_num);
+		if (ret)
+			return ret;
+	}
+
 	mac->link = 0;
 
 	if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
@@ -6278,15 +6291,15 @@ static void hclge_fd_get_ip4_tuple(struct ethtool_rx_flow_spec *fs,
 static void hclge_fd_get_tcpip6_tuple(struct ethtool_rx_flow_spec *fs,
 				      struct hclge_fd_rule *rule, u8 ip_proto)
 {
-	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src,
-			  IPV6_SIZE);
-	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src,
-			  IPV6_SIZE);
+	ipv6_addr_be32_to_cpu(rule->tuples.src_ip,
+			      fs->h_u.tcp_ip6_spec.ip6src);
+	ipv6_addr_be32_to_cpu(rule->tuples_mask.src_ip,
+			      fs->m_u.tcp_ip6_spec.ip6src);
 
-	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst,
-			  IPV6_SIZE);
-	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst,
-			  IPV6_SIZE);
+	ipv6_addr_be32_to_cpu(rule->tuples.dst_ip,
+			      fs->h_u.tcp_ip6_spec.ip6dst);
+	ipv6_addr_be32_to_cpu(rule->tuples_mask.dst_ip,
+			      fs->m_u.tcp_ip6_spec.ip6dst);
 
 	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
 	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
@@ -6307,15 +6320,15 @@ static void hclge_fd_get_tcpip6_tuple(struct ethtool_rx_flow_spec *fs,
 static void hclge_fd_get_ip6_tuple(struct ethtool_rx_flow_spec *fs,
 				   struct hclge_fd_rule *rule)
 {
-	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src,
-			  IPV6_SIZE);
-	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src,
-			  IPV6_SIZE);
+	ipv6_addr_be32_to_cpu(rule->tuples.src_ip,
+			      fs->h_u.usr_ip6_spec.ip6src);
+	ipv6_addr_be32_to_cpu(rule->tuples_mask.src_ip,
+			      fs->m_u.usr_ip6_spec.ip6src);
 
-	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst,
-			  IPV6_SIZE);
-	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst,
-			  IPV6_SIZE);
+	ipv6_addr_be32_to_cpu(rule->tuples.dst_ip,
+			      fs->h_u.usr_ip6_spec.ip6dst);
+	ipv6_addr_be32_to_cpu(rule->tuples_mask.dst_ip,
+			      fs->m_u.usr_ip6_spec.ip6dst);
 
 	rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
 	rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
@@ -6744,21 +6757,19 @@ static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule,
 				     struct ethtool_tcpip6_spec *spec,
 				     struct ethtool_tcpip6_spec *spec_mask)
 {
-	cpu_to_be32_array(spec->ip6src,
-			  rule->tuples.src_ip, IPV6_SIZE);
-	cpu_to_be32_array(spec->ip6dst,
-			  rule->tuples.dst_ip, IPV6_SIZE);
+	ipv6_addr_cpu_to_be32(spec->ip6src, rule->tuples.src_ip);
+	ipv6_addr_cpu_to_be32(spec->ip6dst, rule->tuples.dst_ip);
 	if (rule->unused_tuple & BIT(INNER_SRC_IP))
 		memset(spec_mask->ip6src, 0, sizeof(spec_mask->ip6src));
 	else
-		cpu_to_be32_array(spec_mask->ip6src, rule->tuples_mask.src_ip,
-				  IPV6_SIZE);
+		ipv6_addr_cpu_to_be32(spec_mask->ip6src,
+				      rule->tuples_mask.src_ip);
 
 	if (rule->unused_tuple & BIT(INNER_DST_IP))
 		memset(spec_mask->ip6dst, 0, sizeof(spec_mask->ip6dst));
 	else
-		cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip,
-				  IPV6_SIZE);
+		ipv6_addr_cpu_to_be32(spec_mask->ip6dst,
+				      rule->tuples_mask.dst_ip);
 
 	spec->tclass = rule->tuples.ip_tos;
 	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
@@ -6777,19 +6788,19 @@ static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule,
 				  struct ethtool_usrip6_spec *spec,
 				  struct ethtool_usrip6_spec *spec_mask)
 {
-	cpu_to_be32_array(spec->ip6src, rule->tuples.src_ip, IPV6_SIZE);
-	cpu_to_be32_array(spec->ip6dst, rule->tuples.dst_ip, IPV6_SIZE);
+	ipv6_addr_cpu_to_be32(spec->ip6src, rule->tuples.src_ip);
+	ipv6_addr_cpu_to_be32(spec->ip6dst, rule->tuples.dst_ip);
 	if (rule->unused_tuple & BIT(INNER_SRC_IP))
 		memset(spec_mask->ip6src, 0, sizeof(spec_mask->ip6src));
 	else
-		cpu_to_be32_array(spec_mask->ip6src,
-				  rule->tuples_mask.src_ip, IPV6_SIZE);
+		ipv6_addr_cpu_to_be32(spec_mask->ip6src,
+				      rule->tuples_mask.src_ip);
 
 	if (rule->unused_tuple & BIT(INNER_DST_IP))
 		memset(spec_mask->ip6dst, 0, sizeof(spec_mask->ip6dst));
 	else
-		cpu_to_be32_array(spec_mask->ip6dst,
-				  rule->tuples_mask.dst_ip, IPV6_SIZE);
+		ipv6_addr_cpu_to_be32(spec_mask->ip6dst,
+				      rule->tuples_mask.dst_ip);
 
 	spec->tclass = rule->tuples.ip_tos;
 	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
@@ -7007,7 +7018,7 @@ static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
 	} else {
 		int i;
 
-		for (i = 0; i < IPV6_SIZE; i++) {
+		for (i = 0; i < IPV6_ADDR_WORDS; i++) {
 			tuples->src_ip[i] = be32_to_cpu(flow_ip6_src[i]);
 			tuples->dst_ip[i] = be32_to_cpu(flow_ip6_dst[i]);
 		}
@@ -7262,14 +7273,14 @@ static int hclge_get_cls_key_ip(const struct flow_rule *flow,
 		struct flow_match_ipv6_addrs match;
 
 		flow_rule_match_ipv6_addrs(flow, &match);
-		be32_to_cpu_array(rule->tuples.src_ip, match.key->src.s6_addr32,
-				  IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  match.mask->src.s6_addr32, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples.dst_ip, match.key->dst.s6_addr32,
-				  IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  match.mask->dst.s6_addr32, IPV6_SIZE);
+		ipv6_addr_be32_to_cpu(rule->tuples.src_ip,
+				      match.key->src.s6_addr32);
+		ipv6_addr_be32_to_cpu(rule->tuples_mask.src_ip,
+				      match.mask->src.s6_addr32);
+		ipv6_addr_be32_to_cpu(rule->tuples.dst_ip,
+				      match.key->dst.s6_addr32);
+		ipv6_addr_be32_to_cpu(rule->tuples_mask.dst_ip,
+				      match.mask->dst.s6_addr32);
 	} else {
 		rule->unused_tuple |= BIT(INNER_SRC_IP);
 		rule->unused_tuple |= BIT(INNER_DST_IP);
@@ -11444,7 +11455,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
 
 	pcim_iounmap(pdev, hdev->hw.hw.io_base);
 	pci_free_irq_vectors(pdev);
-	pci_release_mem_regions(pdev);
+	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 }
 
@@ -11516,8 +11527,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
 		dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret);
 
 	hdev->reset_type = HNAE3_NONE_RESET;
-	clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
-	up(&hdev->reset_sem);
+	if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		up(&hdev->reset_sem);
 }
 
 static void hclge_clear_resetting_state(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index b5178b0f88b3..b9fc719880bb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -8,7 +8,9 @@
 #include <linux/phy.h>
 #include <linux/if_vlan.h>
 #include <linux/kfifo.h>
+
 #include <net/devlink.h>
+#include <net/ipv6.h>
 
 #include "hclge_cmd.h"
 #include "hclge_ptp.h"
@@ -718,15 +720,15 @@ struct hclge_fd_cfg {
 };
 
 #define IPV4_INDEX	3
-#define IPV6_SIZE	4
+
 struct hclge_fd_rule_tuples {
 	u8 src_mac[ETH_ALEN];
 	u8 dst_mac[ETH_ALEN];
 	/* Be compatible for ip address of both ipv4 and ipv6.
 	 * For ipv4 address, we store it in src/dst_ip[3].
 	 */
-	u32 src_ip[IPV6_SIZE];
-	u32 dst_ip[IPV6_SIZE];
+	u32 src_ip[IPV6_ADDR_WORDS];
+	u32 dst_ip[IPV6_ADDR_WORDS];
 	u16 src_port;
 	u16 dst_port;
 	u16 vlan_tag1;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 85fb11de43a1..80079657afeb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev)
 	if (ret)
 		netdev_err(netdev, "failed to adjust link.\n");
 
+	hdev->hw.mac.req_speed = (u32)speed;
+	hdev->hw.mac.req_duplex = (u8)duplex;
+
 	ret = hclge_cfg_flowctrl(hdev);
 	if (ret)
 		netdev_err(netdev, "failed to configure flow control.\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index 5fff8ed388f8..5505caea88e9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -389,16 +389,12 @@ int hclge_ptp_get_ts_info(struct hnae3_handle *handle,
 	}
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (hdev->ptp->clock)
 		info->phc_index = ptp_clock_index(hdev->ptp->clock);
-	else
-		info->phc_index = -1;
 
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 3735d2fed11f..094a7c7b5592 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev)
 			 ret);
 
 	hdev->reset_type = HNAE3_NONE_RESET;
-	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
-	up(&hdev->reset_sem);
+	if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+		up(&hdev->reset_sem);
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
index 65b9dcd38137..6db415d8b917 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
@@ -134,17 +134,17 @@ void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
 	reg += hclgevf_reg_get_header(reg);
 
 	/* fetching per-VF registers values from VF PCIe register space */
-	reg_um = sizeof(cmdq_reg_addr_list) / sizeof(u32);
+	reg_um = ARRAY_SIZE(cmdq_reg_addr_list);
 	reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_CMDQ, reg_um, reg);
 	for (i = 0; i < reg_um; i++)
 		*reg++ = hclgevf_read_dev(&hdev->hw, cmdq_reg_addr_list[i]);
 
-	reg_um = sizeof(common_reg_addr_list) / sizeof(u32);
+	reg_um = ARRAY_SIZE(common_reg_addr_list);
 	reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_COMMON, reg_um, reg);
 	for (i = 0; i < reg_um; i++)
 		*reg++ = hclgevf_read_dev(&hdev->hw, common_reg_addr_list[i]);
 
-	reg_um = sizeof(ring_reg_addr_list) / sizeof(u32);
+	reg_um = ARRAY_SIZE(ring_reg_addr_list);
 	for (j = 0; j < hdev->num_tqps; j++) {
 		reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_RING, reg_um, reg);
 		for (i = 0; i < reg_um; i++)
@@ -153,7 +153,7 @@ void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
 						  HCLGEVF_RING_REG_OFFSET * j);
 	}
 
-	reg_um = sizeof(tqp_intr_reg_addr_list) / sizeof(u32);
+	reg_um = ARRAY_SIZE(tqp_intr_reg_addr_list);
 	for (j = 0; j < hdev->num_msi_used - 1; j++) {
 		reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_TQP_INTR, reg_um, reg);
 		for (i = 0; i < reg_um; i++)
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index ed73707176c1..8a047145f0c5 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -575,6 +575,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
 						MDIO_SC_RESET_ST;
 				}
 			}
+			of_node_put(reg_args.np);
 		} else {
 			dev_warn(&pdev->dev, "find syscon ret = %#x\n", ret);
 			mdio_dev->subctrl_vbase = NULL;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index 0304f03d4093..c559dd4291d3 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -1471,7 +1471,6 @@ static void hinic_get_strings(struct net_device *netdev,
 			      u32 stringset, u8 *data)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	char *p = (char *)data;
 	u16 i, j;
 
 	switch (stringset) {
@@ -1479,31 +1478,19 @@ static void hinic_get_strings(struct net_device *netdev,
 		memcpy(data, *hinic_test_strings, sizeof(hinic_test_strings));
 		return;
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(hinic_function_stats); i++) {
-			memcpy(p, hinic_function_stats[i].name,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(hinic_function_stats); i++)
+			ethtool_puts(&data, hinic_function_stats[i].name);
 
-		for (i = 0; i < ARRAY_SIZE(hinic_port_stats); i++) {
-			memcpy(p, hinic_port_stats[i].name,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(hinic_port_stats); i++)
+			ethtool_puts(&data, hinic_port_stats[i].name);
 
-		for (i = 0; i < nic_dev->num_qps; i++) {
-			for (j = 0; j < ARRAY_SIZE(hinic_tx_queue_stats); j++) {
-				sprintf(p, hinic_tx_queue_stats[j].name, i);
-				p += ETH_GSTRING_LEN;
-			}
-		}
+		for (i = 0; i < nic_dev->num_qps; i++)
+			for (j = 0; j < ARRAY_SIZE(hinic_tx_queue_stats); j++)
+				ethtool_sprintf(&data, hinic_tx_queue_stats[j].name, i);
 
-		for (i = 0; i < nic_dev->num_qps; i++) {
-			for (j = 0; j < ARRAY_SIZE(hinic_rx_queue_stats); j++) {
-				sprintf(p, hinic_rx_queue_stats[j].name, i);
-				p += ETH_GSTRING_LEN;
-			}
-		}
+		for (i = 0; i < nic_dev->num_qps; i++)
+			for (j = 0; j < ARRAY_SIZE(hinic_rx_queue_stats); j++)
+				ethtool_sprintf(&data, hinic_rx_queue_stats[j].name, i);
 
 		return;
 	default:
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 1e29e5c9a2df..c41c3f1cc506 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -3063,14 +3063,13 @@ static void ehea_shutdown_single_port(struct ehea_port *port)
 static int ehea_setup_ports(struct ehea_adapter *adapter)
 {
 	struct device_node *lhea_dn;
-	struct device_node *eth_dn = NULL;
+	struct device_node *eth_dn;
 
 	const u32 *dn_log_port_id;
 	int i = 0;
 
 	lhea_dn = adapter->ofdev->dev.of_node;
-	while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) {
-
+	for_each_child_of_node(lhea_dn, eth_dn) {
 		dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
 						 NULL);
 		if (!dn_log_port_id) {
@@ -3102,12 +3101,11 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter,
 					   u32 logical_port_id)
 {
 	struct device_node *lhea_dn;
-	struct device_node *eth_dn = NULL;
+	struct device_node *eth_dn;
 	const u32 *dn_log_port_id;
 
 	lhea_dn = adapter->ofdev->dev.of_node;
-	while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) {
-
+	for_each_child_of_node(lhea_dn, eth_dn) {
 		dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
 						 NULL);
 		if (dn_log_port_id)
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index a19d098f2e2b..dac570f3c110 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -32,7 +32,6 @@
 #include <linux/ethtool.h>
 #include <linux/mii.h>
 #include <linux/bitops.h>
-#include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -96,11 +95,6 @@ MODULE_LICENSE("GPL");
 static u32 busy_phy_map;
 static DEFINE_MUTEX(emac_phy_map_lock);
 
-/* This is the wait queue used to wait on any event related to probe, that
- * is discovery of MALs, other EMACs, ZMII/RGMIIs, etc...
- */
-static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait);
-
 /* Having stable interface names is a doomed idea. However, it would be nice
  * if we didn't have completely random interface names at boot too :-) It's
  * just a matter of making everybody's life easier. Since we are doing
@@ -116,9 +110,6 @@ static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait);
 #define EMAC_BOOT_LIST_SIZE	4
 static struct device_node *emac_boot_list[EMAC_BOOT_LIST_SIZE];
 
-/* How long should I wait for dependent devices ? */
-#define EMAC_PROBE_DEP_TIMEOUT	(HZ * 5)
-
 /* I don't want to litter system log with timeout errors
  * when we have brain-damaged PHY.
  */
@@ -418,8 +409,8 @@ do_retry:
 
 static void emac_hash_mc(struct emac_instance *dev)
 {
+	u32 __iomem *gaht_base = emac_gaht_base(dev);
 	const int regs = EMAC_XAHT_REGS(dev);
-	u32 *gaht_base = emac_gaht_base(dev);
 	u32 gaht_temp[EMAC_XAHT_MAX_REGS];
 	struct netdev_hw_addr *ha;
 	int i;
@@ -973,8 +964,6 @@ static void __emac_set_multicast_list(struct emac_instance *dev)
 	 * we need is just to stop RX channel. This seems to work on all
 	 * tested SoCs.                                                --ebs
 	 *
-	 * If we need the full reset, we might just trigger the workqueue
-	 * and do it async... a bit nasty but should work --BenH
 	 */
 	dev->mcast_pending = 0;
 	emac_rx_disable(dev);
@@ -1228,18 +1217,10 @@ static void emac_print_link_status(struct emac_instance *dev)
 static int emac_open(struct net_device *ndev)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
-	int err, i;
+	int i;
 
 	DBG(dev, "open" NL);
 
-	/* Setup error IRQ handler */
-	err = request_irq(dev->emac_irq, emac_irq, 0, "EMAC", dev);
-	if (err) {
-		printk(KERN_ERR "%s: failed to request IRQ %d\n",
-		       ndev->name, dev->emac_irq);
-		return err;
-	}
-
 	/* Allocate RX ring */
 	for (i = 0; i < NUM_RX_BUFF; ++i)
 		if (emac_alloc_rx_skb(dev, i)) {
@@ -1293,8 +1274,6 @@ static int emac_open(struct net_device *ndev)
 	return 0;
  oom:
 	emac_clean_rx_ring(dev);
-	free_irq(dev->emac_irq, dev);
-
 	return -ENOMEM;
 }
 
@@ -1408,8 +1387,6 @@ static int emac_close(struct net_device *ndev)
 	emac_clean_tx_ring(dev);
 	emac_clean_rx_ring(dev);
 
-	free_irq(dev->emac_irq, dev);
-
 	netif_carrier_off(ndev);
 
 	return 0;
@@ -2390,7 +2367,9 @@ static int emac_check_deps(struct emac_instance *dev,
 		if (deps[i].drvdata != NULL)
 			there++;
 	}
-	return there == EMAC_DEP_COUNT;
+	if (there != EMAC_DEP_COUNT)
+		return -EPROBE_DEFER;
+	return 0;
 }
 
 static void emac_put_deps(struct emac_instance *dev)
@@ -2402,19 +2381,6 @@ static void emac_put_deps(struct emac_instance *dev)
 	platform_device_put(dev->tah_dev);
 }
 
-static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action,
-			      void *data)
-{
-	/* We are only intereted in device addition */
-	if (action == BUS_NOTIFY_BOUND_DRIVER)
-		wake_up_all(&emac_probe_wait);
-	return 0;
-}
-
-static struct notifier_block emac_of_bus_notifier = {
-	.notifier_call = emac_of_bus_notify
-};
-
 static int emac_wait_deps(struct emac_instance *dev)
 {
 	struct emac_depentry deps[EMAC_DEP_COUNT];
@@ -2431,18 +2397,13 @@ static int emac_wait_deps(struct emac_instance *dev)
 		deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph;
 	if (dev->blist && dev->blist > emac_boot_list)
 		deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu;
-	bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier);
-	wait_event_timeout(emac_probe_wait,
-			   emac_check_deps(dev, deps),
-			   EMAC_PROBE_DEP_TIMEOUT);
-	bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier);
-	err = emac_check_deps(dev, deps) ? 0 : -ENODEV;
+	err = emac_check_deps(dev, deps);
 	for (i = 0; i < EMAC_DEP_COUNT; i++) {
 		of_node_put(deps[i].node);
 		if (err)
 			platform_device_put(deps[i].ofdev);
 	}
-	if (err == 0) {
+	if (!err) {
 		dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev;
 		dev->zmii_dev = deps[EMAC_DEP_ZMII_IDX].ofdev;
 		dev->rgmii_dev = deps[EMAC_DEP_RGMII_IDX].ofdev;
@@ -2456,22 +2417,21 @@ static int emac_wait_deps(struct emac_instance *dev)
 static int emac_read_uint_prop(struct device_node *np, const char *name,
 			       u32 *val, int fatal)
 {
-	int len;
-	const u32 *prop = of_get_property(np, name, &len);
-	if (prop == NULL || len < sizeof(u32)) {
+	int err;
+
+	err = of_property_read_u32(np, name, val);
+	if (err) {
 		if (fatal)
-			printk(KERN_ERR "%pOF: missing %s property\n",
-			       np, name);
-		return -ENODEV;
+			pr_err("%pOF: missing %s property", np, name);
+		return err;
 	}
-	*val = *prop;
 	return 0;
 }
 
 static void emac_adjust_link(struct net_device *ndev)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
-	struct phy_device *phy = dev->phy_dev;
+	struct phy_device *phy = ndev->phydev;
 
 	dev->phy.autoneg = phy->autoneg;
 	dev->phy.speed = phy->speed;
@@ -2522,22 +2482,20 @@ static int emac_mdio_phy_start_aneg(struct mii_phy *phy,
 static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise)
 {
 	struct net_device *ndev = phy->dev;
-	struct emac_instance *dev = netdev_priv(ndev);
 
 	phy->autoneg = AUTONEG_ENABLE;
 	phy->advertising = advertise;
-	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, ndev->phydev);
 }
 
 static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
 {
 	struct net_device *ndev = phy->dev;
-	struct emac_instance *dev = netdev_priv(ndev);
 
 	phy->autoneg = AUTONEG_DISABLE;
 	phy->speed = speed;
 	phy->duplex = fd;
-	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, ndev->phydev);
 }
 
 static int emac_mdio_poll_link(struct mii_phy *phy)
@@ -2546,20 +2504,19 @@ static int emac_mdio_poll_link(struct mii_phy *phy)
 	struct emac_instance *dev = netdev_priv(ndev);
 	int res;
 
-	res = phy_read_status(dev->phy_dev);
+	res = phy_read_status(ndev->phydev);
 	if (res) {
 		dev_err(&dev->ofdev->dev, "link update failed (%d).", res);
 		return ethtool_op_get_link(ndev);
 	}
 
-	return dev->phy_dev->link;
+	return ndev->phydev->link;
 }
 
 static int emac_mdio_read_link(struct mii_phy *phy)
 {
 	struct net_device *ndev = phy->dev;
-	struct emac_instance *dev = netdev_priv(ndev);
-	struct phy_device *phy_dev = dev->phy_dev;
+	struct phy_device *phy_dev = ndev->phydev;
 	int res;
 
 	res = phy_read_status(phy_dev);
@@ -2576,10 +2533,9 @@ static int emac_mdio_read_link(struct mii_phy *phy)
 static int emac_mdio_init_phy(struct mii_phy *phy)
 {
 	struct net_device *ndev = phy->dev;
-	struct emac_instance *dev = netdev_priv(ndev);
 
-	phy_start(dev->phy_dev);
-	return phy_init_hw(dev->phy_dev);
+	phy_start(ndev->phydev);
+	return phy_init_hw(ndev->phydev);
 }
 
 static const struct mii_phy_ops emac_dt_mdio_phy_ops = {
@@ -2593,6 +2549,7 @@ static const struct mii_phy_ops emac_dt_mdio_phy_ops = {
 static int emac_dt_mdio_probe(struct emac_instance *dev)
 {
 	struct device_node *mii_np;
+	struct mii_bus *bus;
 	int res;
 
 	mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio");
@@ -2606,23 +2563,23 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
 		goto put_node;
 	}
 
-	dev->mii_bus = devm_mdiobus_alloc(&dev->ofdev->dev);
-	if (!dev->mii_bus) {
+	bus = devm_mdiobus_alloc(&dev->ofdev->dev);
+	if (!bus) {
 		res = -ENOMEM;
 		goto put_node;
 	}
 
-	dev->mii_bus->priv = dev->ndev;
-	dev->mii_bus->parent = dev->ndev->dev.parent;
-	dev->mii_bus->name = "emac_mdio";
-	dev->mii_bus->read = &emac_mii_bus_read;
-	dev->mii_bus->write = &emac_mii_bus_write;
-	dev->mii_bus->reset = &emac_mii_bus_reset;
-	snprintf(dev->mii_bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name);
-	res = of_mdiobus_register(dev->mii_bus, mii_np);
+	bus->priv = dev->ndev;
+	bus->parent = dev->ndev->dev.parent;
+	bus->name = "emac_mdio";
+	bus->read = &emac_mii_bus_read;
+	bus->write = &emac_mii_bus_write;
+	bus->reset = &emac_mii_bus_reset;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name);
+	res = devm_of_mdiobus_register(&dev->ofdev->dev, bus, mii_np);
 	if (res) {
 		dev_err(&dev->ofdev->dev, "cannot register MDIO bus %s (%d)",
-			dev->mii_bus->name, res);
+			bus->name, res);
 	}
 
  put_node:
@@ -2633,26 +2590,28 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
 static int emac_dt_phy_connect(struct emac_instance *dev,
 			       struct device_node *phy_handle)
 {
+	struct phy_device *phy_dev;
+
 	dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
 				    GFP_KERNEL);
 	if (!dev->phy.def)
 		return -ENOMEM;
 
-	dev->phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link,
-				      0, dev->phy_mode);
-	if (!dev->phy_dev) {
+	phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, 0,
+				 dev->phy_mode);
+	if (!phy_dev) {
 		dev_err(&dev->ofdev->dev, "failed to connect to PHY.\n");
 		return -ENODEV;
 	}
 
-	dev->phy.def->phy_id = dev->phy_dev->drv->phy_id;
-	dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask;
-	dev->phy.def->name = dev->phy_dev->drv->name;
+	dev->phy.def->phy_id = phy_dev->drv->phy_id;
+	dev->phy.def->phy_id_mask = phy_dev->drv->phy_id_mask;
+	dev->phy.def->name = phy_dev->drv->name;
 	dev->phy.def->ops = &emac_dt_mdio_phy_ops;
 	ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features,
-						dev->phy_dev->supported);
-	dev->phy.address = dev->phy_dev->mdio.addr;
-	dev->phy.mode = dev->phy_dev->interface;
+						phy_dev->supported);
+	dev->phy.address = phy_dev->mdio.addr;
+	dev->phy.mode = phy_dev->interface;
 	return 0;
 }
 
@@ -2668,8 +2627,6 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
 		res = emac_dt_mdio_probe(dev);
 		if (!res) {
 			res = emac_dt_phy_connect(dev, phy_handle);
-			if (res)
-				mdiobus_unregister(dev->mii_bus);
 		}
 	}
 
@@ -2708,13 +2665,11 @@ static int emac_init_phy(struct emac_instance *dev)
 				return res;
 
 			res = of_phy_register_fixed_link(np);
-			dev->phy_dev = of_phy_find_device(np);
-			if (res || !dev->phy_dev) {
-				mdiobus_unregister(dev->mii_bus);
+			ndev->phydev = of_phy_find_device(np);
+			if (res || !ndev->phydev)
 				return res ? res : -EINVAL;
-			}
 			emac_adjust_link(dev->ndev);
-			put_device(&dev->phy_dev->mdio.dev);
+			put_device(&ndev->phydev->mdio.dev);
 		}
 		return 0;
 	}
@@ -3053,7 +3008,7 @@ static int emac_probe(struct platform_device *ofdev)
 
 	/* Allocate our net_device structure */
 	err = -ENOMEM;
-	ndev = alloc_etherdev(sizeof(struct emac_instance));
+	ndev = devm_alloc_etherdev(&ofdev->dev, sizeof(struct emac_instance));
 	if (!ndev)
 		goto err_gone;
 
@@ -3072,35 +3027,40 @@ static int emac_probe(struct platform_device *ofdev)
 	/* Init various config data based on device-tree */
 	err = emac_init_config(dev);
 	if (err)
-		goto err_free;
+		goto err_gone;
 
-	/* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
+	/* Get interrupts. EMAC irq is mandatory */
 	dev->emac_irq = irq_of_parse_and_map(np, 0);
-	dev->wol_irq = irq_of_parse_and_map(np, 1);
 	if (!dev->emac_irq) {
 		printk(KERN_ERR "%pOF: Can't map main interrupt\n", np);
 		err = -ENODEV;
-		goto err_free;
+		goto err_gone;
+	}
+
+	/* Setup error IRQ handler */
+	err = devm_request_irq(&ofdev->dev, dev->emac_irq, emac_irq, 0, "EMAC",
+			       dev);
+	if (err) {
+		dev_err_probe(&ofdev->dev, err, "failed to request IRQ %d",
+			      dev->emac_irq);
+		goto err_gone;
 	}
+
 	ndev->irq = dev->emac_irq;
 
 	/* Map EMAC regs */
 	// TODO : platform_get_resource() and devm_ioremap_resource()
-	dev->emacp = of_iomap(np, 0);
-	if (dev->emacp == NULL) {
-		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
+	dev->emacp = devm_of_iomap(&ofdev->dev, np, 0, NULL);
+	if (!dev->emacp) {
+		dev_err(&ofdev->dev, "can't map device registers");
 		err = -ENOMEM;
-		goto err_irq_unmap;
+		goto err_gone;
 	}
 
 	/* Wait for dependent devices */
 	err = emac_wait_deps(dev);
-	if (err) {
-		printk(KERN_ERR
-		       "%pOF: Timeout waiting for dependent devices\n", np);
-		/*  display more info about what's missing ? */
-		goto err_reg_unmap;
-	}
+	if (err)
+		goto err_gone;
 	dev->mal = platform_get_drvdata(dev->mal_dev);
 	if (dev->mdio_dev != NULL)
 		dev->mdio_instance = platform_get_drvdata(dev->mdio_dev);
@@ -3187,7 +3147,7 @@ static int emac_probe(struct platform_device *ofdev)
 
 	netif_carrier_off(ndev);
 
-	err = register_netdev(ndev);
+	err = devm_register_netdev(&ofdev->dev, ndev);
 	if (err) {
 		printk(KERN_ERR "%pOF: failed to register net device (%d)!\n",
 		       np, err);
@@ -3200,10 +3160,6 @@ static int emac_probe(struct platform_device *ofdev)
 	wmb();
 	platform_set_drvdata(ofdev, dev);
 
-	/* There's a new kid in town ! Let's tell everybody */
-	wake_up_all(&emac_probe_wait);
-
-
 	printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n",
 	       ndev->name, dev->cell_index, np, ndev->dev_addr);
 
@@ -3232,24 +3188,9 @@ static int emac_probe(struct platform_device *ofdev)
 	mal_unregister_commac(dev->mal, &dev->commac);
  err_rel_deps:
 	emac_put_deps(dev);
- err_reg_unmap:
-	iounmap(dev->emacp);
- err_irq_unmap:
-	if (dev->wol_irq)
-		irq_dispose_mapping(dev->wol_irq);
-	if (dev->emac_irq)
-		irq_dispose_mapping(dev->emac_irq);
- err_free:
-	free_netdev(ndev);
  err_gone:
-	/* if we were on the bootlist, remove us as we won't show up and
-	 * wake up all waiters to notify them in case they were waiting
-	 * on us
-	 */
-	if (blist) {
+	if (blist)
 		*blist = NULL;
-		wake_up_all(&emac_probe_wait);
-	}
 	return err;
 }
 
@@ -3259,8 +3200,6 @@ static void emac_remove(struct platform_device *ofdev)
 
 	DBG(dev, "remove" NL);
 
-	unregister_netdev(dev->ndev);
-
 	cancel_work_sync(&dev->reset_work);
 
 	if (emac_has_feature(dev, EMAC_FTR_HAS_TAH))
@@ -3270,26 +3209,11 @@ static void emac_remove(struct platform_device *ofdev)
 	if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII))
 		zmii_detach(dev->zmii_dev, dev->zmii_port);
 
-	if (dev->phy_dev)
-		phy_disconnect(dev->phy_dev);
-
-	if (dev->mii_bus)
-		mdiobus_unregister(dev->mii_bus);
-
 	busy_phy_map &= ~(1 << dev->phy.address);
 	DBG(dev, "busy_phy_map now %#x" NL, busy_phy_map);
 
 	mal_unregister_commac(dev->mal, &dev->commac);
 	emac_put_deps(dev);
-
-	iounmap(dev->emacp);
-
-	if (dev->wol_irq)
-		irq_dispose_mapping(dev->wol_irq);
-	if (dev->emac_irq)
-		irq_dispose_mapping(dev->emac_irq);
-
-	free_netdev(dev->ndev);
 }
 
 /* XXX Features in here should be replaced by properties... */
@@ -3328,16 +3252,15 @@ static void __init emac_make_bootlist(void)
 
 	/* Collect EMACs */
 	while((np = of_find_all_nodes(np)) != NULL) {
-		const u32 *idx;
+		u32 idx;
 
 		if (of_match_node(emac_match, np) == NULL)
 			continue;
 		if (of_property_read_bool(np, "unused"))
 			continue;
-		idx = of_get_property(np, "cell-index", NULL);
-		if (idx == NULL)
+		if (of_property_read_u32(np, "cell-index", &idx))
 			continue;
-		cell_indices[i] = *idx;
+		cell_indices[i] = idx;
 		emac_boot_list[i++] = of_node_get(np);
 		if (i >= EMAC_BOOT_LIST_SIZE) {
 			of_node_put(np);
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 295516b07662..89fa1683ec3c 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -188,10 +188,6 @@ struct emac_instance {
 	struct emac_instance		*mdio_instance;
 	struct mutex			mdio_lock;
 
-	/* Device-tree based phy configuration */
-	struct mii_bus			*mii_bus;
-	struct phy_device		*phy_dev;
-
 	/* ZMII infos if any */
 	u32				zmii_ph;
 	u32				zmii_port;
@@ -400,7 +396,7 @@ static inline int emac_has_feature(struct emac_instance *dev,
 	((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >>	\
 	 ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1)))
 
-static inline u32 *emac_xaht_base(struct emac_instance *dev)
+static inline u32 __iomem *emac_xaht_base(struct emac_instance *dev)
 {
 	struct emac_regs __iomem *p = dev->emacp;
 	int offset;
@@ -413,10 +409,10 @@ static inline u32 *emac_xaht_base(struct emac_instance *dev)
 	else
 		offset = offsetof(struct emac_regs, u0.emac4.iaht1);
 
-	return (u32 *)((ptrdiff_t)p + offset);
+	return (u32 __iomem *)((__force ptrdiff_t)p + offset);
 }
 
-static inline u32 *emac_gaht_base(struct emac_instance *dev)
+static inline u32 __iomem *emac_gaht_base(struct emac_instance *dev)
 {
 	/* GAHT registers always come after an identical number of
 	 * IAHT registers.
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 4c9d9badd698..b619a3ec245b 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -39,7 +39,8 @@
 #include "ibmveth.h"
 
 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
-static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
+static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
+				       bool reuse);
 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
 
 static struct kobj_type ktype_veth_pool;
@@ -226,6 +227,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
 	for (i = 0; i < count; ++i) {
 		union ibmveth_buf_desc desc;
 
+		free_index = pool->consumer_index;
+		index = pool->free_map[free_index];
+		skb = NULL;
+
+		BUG_ON(index == IBM_VETH_INVALID_MAP);
+
+		/* are we allocating a new buffer or recycling an old one */
+		if (pool->skbuff[index])
+			goto reuse;
+
 		skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
 
 		if (!skb) {
@@ -235,46 +246,46 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
 			break;
 		}
 
-		free_index = pool->consumer_index;
-		pool->consumer_index++;
-		if (pool->consumer_index >= pool->size)
-			pool->consumer_index = 0;
-		index = pool->free_map[free_index];
-
-		BUG_ON(index == IBM_VETH_INVALID_MAP);
-		BUG_ON(pool->skbuff[index] != NULL);
-
 		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
 				pool->buff_size, DMA_FROM_DEVICE);
 
 		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
 			goto failure;
 
-		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
 		pool->dma_addr[index] = dma_addr;
 		pool->skbuff[index] = skb;
 
-		correlator = ((u64)pool->index << 32) | index;
-		*(u64 *)skb->data = correlator;
-
-		desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
-		desc.fields.address = dma_addr;
-
 		if (rx_flush) {
 			unsigned int len = min(pool->buff_size,
-						adapter->netdev->mtu +
-						IBMVETH_BUFF_OH);
+					       adapter->netdev->mtu +
+					       IBMVETH_BUFF_OH);
 			ibmveth_flush_buffer(skb->data, len);
 		}
+reuse:
+		dma_addr = pool->dma_addr[index];
+		desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
+		desc.fields.address = dma_addr;
+
+		correlator = ((u64)pool->index << 32) | index;
+		*(u64 *)pool->skbuff[index]->data = correlator;
+
 		lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
 						   desc.desc);
 
 		if (lpar_rc != H_SUCCESS) {
+			netdev_warn(adapter->netdev,
+				    "%sadd_logical_lan failed %lu\n",
+				    skb ? "" : "When recycling: ", lpar_rc);
 			goto failure;
-		} else {
-			buffers_added++;
-			adapter->replenish_add_buff_success++;
 		}
+
+		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
+		pool->consumer_index++;
+		if (pool->consumer_index >= pool->size)
+			pool->consumer_index = 0;
+
+		buffers_added++;
+		adapter->replenish_add_buff_success++;
 	}
 
 	mb();
@@ -282,17 +293,13 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
 	return;
 
 failure:
-	pool->free_map[free_index] = index;
-	pool->skbuff[index] = NULL;
-	if (pool->consumer_index == 0)
-		pool->consumer_index = pool->size - 1;
-	else
-		pool->consumer_index--;
-	if (!dma_mapping_error(&adapter->vdev->dev, dma_addr))
+
+	if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
 		dma_unmap_single(&adapter->vdev->dev,
 		                 pool->dma_addr[index], pool->buff_size,
 		                 DMA_FROM_DEVICE);
-	dev_kfree_skb_any(skb);
+	dev_kfree_skb_any(pool->skbuff[index]);
+	pool->skbuff[index] = NULL;
 	adapter->replenish_add_buff_failure++;
 
 	mb();
@@ -365,7 +372,7 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
 
 /* remove a buffer from a pool */
 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
-					    u64 correlator)
+					    u64 correlator, bool reuse)
 {
 	unsigned int pool  = correlator >> 32;
 	unsigned int index = correlator & 0xffffffffUL;
@@ -376,15 +383,23 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
 	BUG_ON(index >= adapter->rx_buff_pool[pool].size);
 
 	skb = adapter->rx_buff_pool[pool].skbuff[index];
-
 	BUG_ON(skb == NULL);
 
-	adapter->rx_buff_pool[pool].skbuff[index] = NULL;
+	/* if we are going to reuse the buffer then keep the pointers around
+	 * but mark index as available. replenish will see the skb pointer and
+	 * assume it is to be recycled.
+	 */
+	if (!reuse) {
+		/* remove the skb pointer to mark free. actual freeing is done
+		 * by upper level networking after gro_recieve
+		 */
+		adapter->rx_buff_pool[pool].skbuff[index] = NULL;
 
-	dma_unmap_single(&adapter->vdev->dev,
-			 adapter->rx_buff_pool[pool].dma_addr[index],
-			 adapter->rx_buff_pool[pool].buff_size,
-			 DMA_FROM_DEVICE);
+		dma_unmap_single(&adapter->vdev->dev,
+				 adapter->rx_buff_pool[pool].dma_addr[index],
+				 adapter->rx_buff_pool[pool].buff_size,
+				 DMA_FROM_DEVICE);
+	}
 
 	free_index = adapter->rx_buff_pool[pool].producer_index;
 	adapter->rx_buff_pool[pool].producer_index++;
@@ -411,51 +426,13 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
 	return adapter->rx_buff_pool[pool].skbuff[index];
 }
 
-/* recycle the current buffer on the rx queue */
-static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
+static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
+				       bool reuse)
 {
-	u32 q_index = adapter->rx_queue.index;
-	u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
-	unsigned int pool = correlator >> 32;
-	unsigned int index = correlator & 0xffffffffUL;
-	union ibmveth_buf_desc desc;
-	unsigned long lpar_rc;
-	int ret = 1;
-
-	BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
-	BUG_ON(index >= adapter->rx_buff_pool[pool].size);
-
-	if (!adapter->rx_buff_pool[pool].active) {
-		ibmveth_rxq_harvest_buffer(adapter);
-		ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
-		goto out;
-	}
-
-	desc.fields.flags_len = IBMVETH_BUF_VALID |
-		adapter->rx_buff_pool[pool].buff_size;
-	desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index];
-
-	lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
-
-	if (lpar_rc != H_SUCCESS) {
-		netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed "
-			   "during recycle rc=%ld", lpar_rc);
-		ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
-		ret = 0;
-	}
-
-	if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
-		adapter->rx_queue.index = 0;
-		adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
-	}
-
-out:
-	return ret;
-}
+	u64 cor;
 
-static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
-{
-	ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
+	cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
+	ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
 
 	if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
 		adapter->rx_queue.index = 0;
@@ -1337,6 +1314,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 	unsigned long lpar_rc;
 	u16 mss = 0;
 
+restart_poll:
 	while (frames_processed < budget) {
 		if (!ibmveth_rxq_pending_buffer(adapter))
 			break;
@@ -1346,7 +1324,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 			wmb(); /* suggested by larson1 */
 			adapter->rx_invalid_buffer++;
 			netdev_dbg(netdev, "recycling invalid buffer\n");
-			ibmveth_rxq_recycle_buffer(adapter);
+			ibmveth_rxq_harvest_buffer(adapter, true);
 		} else {
 			struct sk_buff *skb, *new_skb;
 			int length = ibmveth_rxq_frame_length(adapter);
@@ -1379,11 +1357,10 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 				if (rx_flush)
 					ibmveth_flush_buffer(skb->data,
 						length + offset);
-				if (!ibmveth_rxq_recycle_buffer(adapter))
-					kfree_skb(skb);
+				ibmveth_rxq_harvest_buffer(adapter, true);
 				skb = new_skb;
 			} else {
-				ibmveth_rxq_harvest_buffer(adapter);
+				ibmveth_rxq_harvest_buffer(adapter, false);
 				skb_reserve(skb, offset);
 			}
 
@@ -1420,24 +1397,25 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 
 	ibmveth_replenish_task(adapter);
 
-	if (frames_processed < budget) {
-		napi_complete_done(napi, frames_processed);
+	if (frames_processed == budget)
+		goto out;
 
-		/* We think we are done - reenable interrupts,
-		 * then check once more to make sure we are done.
-		 */
-		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
-				       VIO_IRQ_ENABLE);
+	if (!napi_complete_done(napi, frames_processed))
+		goto out;
 
-		BUG_ON(lpar_rc != H_SUCCESS);
+	/* We think we are done - reenable interrupts,
+	 * then check once more to make sure we are done.
+	 */
+	lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
+	BUG_ON(lpar_rc != H_SUCCESS);
 
-		if (ibmveth_rxq_pending_buffer(adapter) &&
-		    napi_schedule(napi)) {
-			lpar_rc = h_vio_signal(adapter->vdev->unit_address,
-					       VIO_IRQ_DISABLE);
-		}
+	if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
+		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
+				       VIO_IRQ_DISABLE);
+		goto restart_poll;
 	}
 
+out:
 	return frames_processed;
 }
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 23ebeb143987..87e693a81433 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -117,6 +117,7 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 				struct ibmvnic_long_term_buff *ltb);
 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
 static void flush_reset_queue(struct ibmvnic_adapter *adapter);
+static void print_subcrq_error(struct device *dev, int rc, const char *func);
 
 struct ibmvnic_stat {
 	char name[ETH_GSTRING_LEN];
@@ -2140,63 +2141,49 @@ static int ibmvnic_close(struct net_device *netdev)
 }
 
 /**
- * build_hdr_data - creates L2/L3/L4 header data buffer
+ * get_hdr_lens - fills list of L2/L3/L4 hdr lens
  * @hdr_field: bitfield determining needed headers
  * @skb: socket buffer
- * @hdr_len: array of header lengths
- * @hdr_data: buffer to write the header to
+ * @hdr_len: array of header lengths to be filled
  *
  * Reads hdr_field to determine which headers are needed by firmware.
  * Builds a buffer containing these headers.  Saves individual header
  * lengths and total buffer length to be used to build descriptors.
+ *
+ * Return: total len of all headers
  */
-static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
-			  int *hdr_len, u8 *hdr_data)
+static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb,
+			int *hdr_len)
 {
 	int len = 0;
-	u8 *hdr;
 
-	if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
-		hdr_len[0] = sizeof(struct vlan_ethhdr);
-	else
-		hdr_len[0] = sizeof(struct ethhdr);
+
+	if ((hdr_field >> 6) & 1) {
+		hdr_len[0] = skb_mac_header_len(skb);
+		len += hdr_len[0];
+	}
+
+	if ((hdr_field >> 5) & 1) {
+		hdr_len[1] = skb_network_header_len(skb);
+		len += hdr_len[1];
+	}
+
+	if (!((hdr_field >> 4) & 1))
+		return len;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
-		hdr_len[1] = ip_hdr(skb)->ihl * 4;
 		if (ip_hdr(skb)->protocol == IPPROTO_TCP)
 			hdr_len[2] = tcp_hdrlen(skb);
 		else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
 			hdr_len[2] = sizeof(struct udphdr);
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
-		hdr_len[1] = sizeof(struct ipv6hdr);
 		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
 			hdr_len[2] = tcp_hdrlen(skb);
 		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
 			hdr_len[2] = sizeof(struct udphdr);
-	} else if (skb->protocol == htons(ETH_P_ARP)) {
-		hdr_len[1] = arp_hdr_len(skb->dev);
-		hdr_len[2] = 0;
 	}
 
-	memset(hdr_data, 0, 120);
-	if ((hdr_field >> 6) & 1) {
-		hdr = skb_mac_header(skb);
-		memcpy(hdr_data, hdr, hdr_len[0]);
-		len += hdr_len[0];
-	}
-
-	if ((hdr_field >> 5) & 1) {
-		hdr = skb_network_header(skb);
-		memcpy(hdr_data + len, hdr, hdr_len[1]);
-		len += hdr_len[1];
-	}
-
-	if ((hdr_field >> 4) & 1) {
-		hdr = skb_transport_header(skb);
-		memcpy(hdr_data + len, hdr, hdr_len[2]);
-		len += hdr_len[2];
-	}
-	return len;
+	return len + hdr_len[2];
 }
 
 /**
@@ -2209,12 +2196,14 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
  *
  * Creates header and, if needed, header extension descriptors and
  * places them in a descriptor array, scrq_arr
+ *
+ * Return: Number of header descs
  */
 
 static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
 			    union sub_crq *scrq_arr)
 {
-	union sub_crq hdr_desc;
+	union sub_crq *hdr_desc;
 	int tmp_len = len;
 	int num_descs = 0;
 	u8 *data, *cur;
@@ -2223,28 +2212,26 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
 	while (tmp_len > 0) {
 		cur = hdr_data + len - tmp_len;
 
-		memset(&hdr_desc, 0, sizeof(hdr_desc));
-		if (cur != hdr_data) {
-			data = hdr_desc.hdr_ext.data;
+		hdr_desc = &scrq_arr[num_descs];
+		if (num_descs) {
+			data = hdr_desc->hdr_ext.data;
 			tmp = tmp_len > 29 ? 29 : tmp_len;
-			hdr_desc.hdr_ext.first = IBMVNIC_CRQ_CMD;
-			hdr_desc.hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
-			hdr_desc.hdr_ext.len = tmp;
+			hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD;
+			hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
+			hdr_desc->hdr_ext.len = tmp;
 		} else {
-			data = hdr_desc.hdr.data;
+			data = hdr_desc->hdr.data;
 			tmp = tmp_len > 24 ? 24 : tmp_len;
-			hdr_desc.hdr.first = IBMVNIC_CRQ_CMD;
-			hdr_desc.hdr.type = IBMVNIC_HDR_DESC;
-			hdr_desc.hdr.len = tmp;
-			hdr_desc.hdr.l2_len = (u8)hdr_len[0];
-			hdr_desc.hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
-			hdr_desc.hdr.l4_len = (u8)hdr_len[2];
-			hdr_desc.hdr.flag = hdr_field << 1;
+			hdr_desc->hdr.first = IBMVNIC_CRQ_CMD;
+			hdr_desc->hdr.type = IBMVNIC_HDR_DESC;
+			hdr_desc->hdr.len = tmp;
+			hdr_desc->hdr.l2_len = (u8)hdr_len[0];
+			hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
+			hdr_desc->hdr.l4_len = (u8)hdr_len[2];
+			hdr_desc->hdr.flag = hdr_field << 1;
 		}
 		memcpy(data, cur, tmp);
 		tmp_len -= tmp;
-		*scrq_arr = hdr_desc;
-		scrq_arr++;
 		num_descs++;
 	}
 
@@ -2267,13 +2254,11 @@ static void build_hdr_descs_arr(struct sk_buff *skb,
 				int *num_entries, u8 hdr_field)
 {
 	int hdr_len[3] = {0, 0, 0};
-	u8 hdr_data[140] = {0};
 	int tot_len;
 
-	tot_len = build_hdr_data(hdr_field, skb, hdr_len,
-				 hdr_data);
-	*num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
-					 indir_arr + 1);
+	tot_len = get_hdr_lens(hdr_field, skb, hdr_len);
+	*num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb),
+					 tot_len, hdr_len, indir_arr + 1);
 }
 
 static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -2350,8 +2335,29 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
 	}
 }
 
+static int send_subcrq_direct(struct ibmvnic_adapter *adapter,
+			      u64 remote_handle, u64 *entry)
+{
+	unsigned int ua = adapter->vdev->unit_address;
+	struct device *dev = &adapter->vdev->dev;
+	int rc;
+
+	/* Make sure the hypervisor sees the complete request */
+	dma_wmb();
+	rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
+				cpu_to_be64(remote_handle),
+				cpu_to_be64(entry[0]), cpu_to_be64(entry[1]),
+				cpu_to_be64(entry[2]), cpu_to_be64(entry[3]));
+
+	if (rc)
+		print_subcrq_error(dev, rc, __func__);
+
+	return rc;
+}
+
 static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
-				 struct ibmvnic_sub_crq_queue *tx_scrq)
+				 struct ibmvnic_sub_crq_queue *tx_scrq,
+				 bool indirect)
 {
 	struct ibmvnic_ind_xmit_queue *ind_bufp;
 	u64 dma_addr;
@@ -2366,7 +2372,13 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
 
 	if (!entries)
 		return 0;
-	rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
+
+	if (indirect)
+		rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
+	else
+		rc = send_subcrq_direct(adapter, handle,
+					(u64 *)ind_bufp->indir_arr);
+
 	if (rc)
 		ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
 	else
@@ -2397,6 +2409,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned long lpar_rc;
 	union sub_crq tx_crq;
 	unsigned int offset;
+	bool use_scrq_send_direct = false;
 	int num_entries = 1;
 	unsigned char *dst;
 	int bufidx = 0;
@@ -2424,7 +2437,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_dropped++;
 		tx_send_failed++;
 		ret = NETDEV_TX_OK;
-		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
 		if (lpar_rc != H_SUCCESS)
 			goto tx_err;
 		goto out;
@@ -2442,7 +2455,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
-		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
 		if (lpar_rc != H_SUCCESS)
 			goto tx_err;
 		goto out;
@@ -2456,6 +2469,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	memset(dst, 0, tx_pool->buf_size);
 	data_dma_addr = ltb->addr + offset;
 
+	/* if we are going to send_subcrq_direct this then we need to
+	 * update the checksum before copying the data into ltb. Essentially
+	 * these packets force disable CSO so that we can guarantee that
+	 * FW does not need header info and we can send direct.
+	 */
+	if (!skb_is_gso(skb) && !ind_bufp->index && !netdev_xmit_more()) {
+		use_scrq_send_direct = true;
+		if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		    skb_checksum_help(skb))
+			use_scrq_send_direct = false;
+	}
+
 	if (skb_shinfo(skb)->nr_frags) {
 		int cur, i;
 
@@ -2475,9 +2500,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		skb_copy_from_linear_data(skb, dst, skb->len);
 	}
 
-	/* post changes to long_term_buff *dst before VIOS accessing it */
-	dma_wmb();
-
 	tx_pool->consumer_index =
 	    (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
 
@@ -2540,6 +2562,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
 		tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
 		hdrs += 2;
+	} else if (use_scrq_send_direct) {
+		/* See above comment, CSO disabled with direct xmit */
+		tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD);
+		ind_bufp->index = 1;
+		tx_buff->num_entries = 1;
+		netdev_tx_sent_queue(txq, skb->len);
+		ind_bufp->indir_arr[0] = tx_crq;
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false);
+		if (lpar_rc != H_SUCCESS)
+			goto tx_err;
+
+		goto early_exit;
 	}
 
 	if ((*hdrs >> 7) & 1)
@@ -2549,7 +2583,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_buff->num_entries = num_entries;
 	/* flush buffer if current entry can not fit */
 	if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
-		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
 		if (lpar_rc != H_SUCCESS)
 			goto tx_flush_err;
 	}
@@ -2557,15 +2591,17 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	indir_arr[0] = tx_crq;
 	memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
 	       num_entries * sizeof(struct ibmvnic_generic_scrq));
+
 	ind_bufp->index += num_entries;
 	if (__netdev_tx_sent_queue(txq, skb->len,
 				   netdev_xmit_more() &&
 				   ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
-		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
 		if (lpar_rc != H_SUCCESS)
 			goto tx_err;
 	}
 
+early_exit:
 	if (atomic_add_return(num_entries, &tx_scrq->used)
 					>= adapter->req_tx_entries_per_subcrq) {
 		netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
@@ -3527,9 +3563,8 @@ restart_poll:
 	}
 
 	if (adapter->state != VNIC_CLOSING &&
-	    ((atomic_read(&adapter->rx_pool[scrq_num].available) <
-	      adapter->req_rx_add_entries_per_subcrq / 2) ||
-	      frames_processed < budget))
+	    (atomic_read(&adapter->rx_pool[scrq_num].available) <
+	      adapter->req_rx_add_entries_per_subcrq / 2))
 		replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
 	if (frames_processed < budget) {
 		if (napi_complete_done(napi, frames_processed)) {
@@ -4169,20 +4204,17 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 			       struct ibmvnic_sub_crq_queue *scrq)
 {
 	struct device *dev = &adapter->vdev->dev;
+	int num_packets = 0, total_bytes = 0;
 	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
 	struct netdev_queue *txq;
 	union sub_crq *next;
-	int index;
-	int i;
+	int index, i;
 
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
 		int num_entries = 0;
-		int total_bytes = 0;
-		int num_packets = 0;
-
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
 			index = be32_to_cpu(next->tx_comp.correlators[i]);
@@ -4218,8 +4250,6 @@ restart_loop:
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
-		txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
-		netdev_tx_completed_queue(txq, num_packets, total_bytes);
 
 		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
@@ -4244,6 +4274,9 @@ restart_loop:
 		goto restart_loop;
 	}
 
+	txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
+	netdev_tx_completed_queue(txq, num_packets, total_bytes);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 360ee26557f7..f103249b12fa 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6671,8 +6671,10 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 		if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
 			/* enable wakeup by the PHY */
 			retval = e1000_init_phy_wakeup(adapter, wufc);
-			if (retval)
-				return retval;
+			if (retval) {
+				e_err("Failed to enable wakeup\n");
+				goto skip_phy_configurations;
+			}
 		} else {
 			/* enable wakeup by the MAC */
 			ew32(WUFC, wufc);
@@ -6693,8 +6695,10 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 			 * or broadcast.
 			 */
 			retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
-			if (retval)
-				return retval;
+			if (retval) {
+				e_err("Failed to enable ULP\n");
+				goto skip_phy_configurations;
+			}
 		}
 	}
 
@@ -6726,6 +6730,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 		hw->phy.ops.release(hw);
 	}
 
+skip_phy_configurations:
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */
@@ -6968,15 +6973,13 @@ static int e1000e_pm_suspend(struct device *dev)
 	e1000e_pm_freeze(dev);
 
 	rc = __e1000_shutdown(pdev, false);
-	if (rc) {
-		e1000e_pm_thaw(dev);
-	} else {
+	if (!rc) {
 		/* Introduce S0ix implementation */
 		if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
 			e1000e_s0ix_entry_flow(adapter);
 	}
 
-	return rc;
+	return 0;
 }
 
 static int e1000e_pm_resume(struct device *dev)
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d546567e0286..2089a0e172bf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -4,6 +4,7 @@
 #ifndef _I40E_H_
 #define _I40E_H_
 
+#include <linux/linkmode.h>
 #include <linux/pci.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/types.h>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 1d0d2e526adb..f2506511bbff 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2555,16 +2555,12 @@ static int i40e_get_ts_info(struct net_device *dev,
 		return ethtool_op_get_ts_info(dev, info);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (pf->ptp_clock)
 		info->phc_index = ptp_clock_index(pf->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
 
@@ -5641,6 +5637,26 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+static void i40e_eee_capability_to_kedata_supported(__le16 eee_capability_,
+						    unsigned long *supported)
+{
+	const int eee_capability = le16_to_cpu(eee_capability_);
+	static const int lut[] = {
+		ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+		ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+		ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+		ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+		ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+		ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+		ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+	};
+
+	linkmode_zero(supported);
+	for (unsigned int i = ARRAY_SIZE(lut); i--; )
+		if (eee_capability & BIT(i + 1))
+			linkmode_set_bit(lut[i], supported);
+}
+
 static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -5648,7 +5664,7 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
-	int status = 0;
+	int status;
 
 	/* Get initial PHY capabilities */
 	status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_cfg, NULL);
@@ -5661,11 +5677,18 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
 	if (phy_cfg.eee_capability == 0)
 		return -EOPNOTSUPP;
 
+	i40e_eee_capability_to_kedata_supported(phy_cfg.eee_capability,
+						edata->supported);
+	linkmode_copy(edata->lp_advertised, edata->supported);
+
 	/* Get current configuration */
 	status = i40e_aq_get_phy_capabilities(hw, false, false, &phy_cfg, NULL);
 	if (status)
 		return -EAGAIN;
 
+	linkmode_zero(edata->advertised);
+	if (phy_cfg.eee_capability)
+		linkmode_copy(edata->advertised, edata->supported);
 	edata->eee_enabled = !!phy_cfg.eee_capability;
 	edata->tx_lpi_enabled = pf->stats.tx_lpi_status;
 
@@ -5681,10 +5704,11 @@ static int i40e_is_eee_param_supported(struct net_device *netdev,
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_ethtool_not_used {
-		u32 value;
+		bool value;
 		const char *name;
 	} param[] = {
-		{edata->tx_lpi_timer, "tx-timer"},
+		{!!(edata->advertised[0] & ~edata->supported[0]), "advertise"},
+		{!!edata->tx_lpi_timer, "tx-timer"},
 		{edata->tx_lpi_enabled != pf->stats.tx_lpi_status, "tx-lpi"}
 	};
 	int i;
@@ -5710,7 +5734,7 @@ static int i40e_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	__le16 eee_capability;
-	int status = 0;
+	int status;
 
 	/* Deny parameters we don't support */
 	if (i40e_is_eee_param_supported(netdev, edata))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index cbcfada7b357..03205eb9f925 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7264,6 +7264,26 @@ out:
 }
 #endif /* CONFIG_I40E_DCB */
 
+static void i40e_print_link_message_eee(struct i40e_vsi *vsi,
+					const char *speed, const char *fc)
+{
+	struct ethtool_keee kedata;
+
+	memzero_explicit(&kedata, sizeof(kedata));
+	if (vsi->netdev->ethtool_ops->get_eee)
+		vsi->netdev->ethtool_ops->get_eee(vsi->netdev, &kedata);
+
+	if (!linkmode_empty(kedata.supported))
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s, EEE: %s\n",
+			    speed, fc,
+			    kedata.eee_enabled ? "Enabled" : "Disabled");
+	else
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
+			    speed, fc);
+}
+
 /**
  * i40e_print_link_message - print link up or down
  * @vsi: the VSI for which link needs a message
@@ -7395,9 +7415,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
 			    speed, req_fec, fec, an, fc);
 	} else {
-		netdev_info(vsi->netdev,
-			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
-			    speed, fc);
+		i40e_print_link_message_eee(vsi, speed, fc);
 	}
 
 }
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 23a6557fc3db..48cd1d06761c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -33,6 +33,7 @@
 #include <net/udp.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_skbedit.h>
 
 #include "iavf_type.h"
 #include <linux/avf/virtchnl.h>
@@ -393,6 +394,8 @@ struct iavf_adapter {
 			     VIRTCHNL_VF_OFFLOAD_VLAN_V2)
 #define CRC_OFFLOAD_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
 				 VIRTCHNL_VF_OFFLOAD_CRC)
+#define TC_U32_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			    VIRTCHNL_VF_OFFLOAD_TC_U32)
 #define VLAN_V2_FILTERING_ALLOWED(_a) \
 	(VLAN_V2_ALLOWED((_a)) && \
 	 ((_a)->vlan_v2_caps.filtering.filtering_support.outer || \
@@ -437,6 +440,7 @@ struct iavf_adapter {
 
 #define IAVF_MAX_FDIR_FILTERS 128	/* max allowed Flow Director filters */
 	u16 fdir_active_fltr;
+	u16 raw_fdir_active_fltr;
 	struct list_head fdir_list_head;
 	spinlock_t fdir_fltr_lock;	/* protect the Flow Director filter list */
 
@@ -444,6 +448,32 @@ struct iavf_adapter {
 	spinlock_t adv_rss_lock;	/* protect the RSS management list */
 };
 
+/* Must be called with fdir_fltr_lock lock held */
+static inline bool iavf_fdir_max_reached(struct iavf_adapter *adapter)
+{
+	return adapter->fdir_active_fltr + adapter->raw_fdir_active_fltr >=
+			IAVF_MAX_FDIR_FILTERS;
+}
+
+static inline void
+iavf_inc_fdir_active_fltr(struct iavf_adapter *adapter,
+			  struct iavf_fdir_fltr *fltr)
+{
+	if (iavf_is_raw_fdir(fltr))
+		adapter->raw_fdir_active_fltr++;
+	else
+		adapter->fdir_active_fltr++;
+}
+
+static inline void
+iavf_dec_fdir_active_fltr(struct iavf_adapter *adapter,
+			  struct iavf_fdir_fltr *fltr)
+{
+	if (iavf_is_raw_fdir(fltr))
+		adapter->raw_fdir_active_fltr--;
+	else
+		adapter->fdir_active_fltr--;
+}
 
 /* Ethtool Private Flags */
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 52273f7eab2c..74a1e9fe1821 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -927,7 +927,7 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
 
 	spin_lock_bh(&adapter->fdir_fltr_lock);
 
-	rule = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	rule = iavf_find_fdir_fltr(adapter, false, fsp->location);
 	if (!rule) {
 		ret = -EINVAL;
 		goto release_lock;
@@ -1072,6 +1072,9 @@ iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd,
 	spin_lock_bh(&adapter->fdir_fltr_lock);
 
 	list_for_each_entry(fltr, &adapter->fdir_list_head, list) {
+		if (iavf_is_raw_fdir(fltr))
+			continue;
+
 		if (cnt == cmd->rule_cnt) {
 			val = -EMSGSIZE;
 			goto release_lock;
@@ -1263,15 +1266,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
 		return -EINVAL;
 
 	spin_lock_bh(&adapter->fdir_fltr_lock);
-	if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
-		spin_unlock_bh(&adapter->fdir_fltr_lock);
-		dev_err(&adapter->pdev->dev,
-			"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
-			IAVF_MAX_FDIR_FILTERS);
-		return -ENOSPC;
-	}
-
-	if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
+	if (iavf_find_fdir_fltr(adapter, false, fsp->location)) {
 		dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
 		spin_unlock_bh(&adapter->fdir_fltr_lock);
 		return -EEXIST;
@@ -1291,23 +1286,10 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
 	}
 
 	err = iavf_add_fdir_fltr_info(adapter, fsp, fltr);
-	if (err)
-		goto ret;
-
-	spin_lock_bh(&adapter->fdir_fltr_lock);
-	iavf_fdir_list_add_fltr(adapter, fltr);
-	adapter->fdir_active_fltr++;
-
-	if (adapter->link_up)
-		fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
-	else
-		fltr->state = IAVF_FDIR_FLTR_INACTIVE;
-	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	if (!err)
+		err = iavf_fdir_add_fltr(adapter, fltr);
 
-	if (adapter->link_up)
-		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_FDIR_FILTER);
-ret:
-	if (err && fltr)
+	if (err)
 		kfree(fltr);
 
 	mutex_unlock(&adapter->crit_lock);
@@ -1324,34 +1306,11 @@ ret:
 static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct iavf_fdir_fltr *fltr = NULL;
-	int err = 0;
 
 	if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
 		return -EOPNOTSUPP;
 
-	spin_lock_bh(&adapter->fdir_fltr_lock);
-	fltr = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
-	if (fltr) {
-		if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
-			fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
-		} else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) {
-			list_del(&fltr->list);
-			kfree(fltr);
-			adapter->fdir_active_fltr--;
-			fltr = NULL;
-		} else {
-			err = -EBUSY;
-		}
-	} else if (adapter->fdir_active_fltr) {
-		err = -EINVAL;
-	}
-	spin_unlock_bh(&adapter->fdir_fltr_lock);
-
-	if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
-		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_FDIR_FILTER);
-
-	return err;
+	return iavf_fdir_del_fltr(adapter, false, fsp->location);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 2d47b0b4640e..a1b3b44cc14a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -796,6 +796,9 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 
 	spin_lock_bh(&adapter->fdir_fltr_lock);
 	list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
+		if (iavf_is_raw_fdir(fltr))
+			continue;
+
 		if (tmp->flow_type != fltr->flow_type)
 			continue;
 
@@ -815,33 +818,52 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 }
 
 /**
- * iavf_find_fdir_fltr_by_loc - find filter with location
+ * iavf_find_fdir_fltr - find FDIR filter
  * @adapter: pointer to the VF adapter structure
- * @loc: location to find.
+ * @is_raw: filter type, is raw (tc u32) or not (ethtool)
+ * @data: data to ID the filter, type dependent
  *
- * Returns pointer to Flow Director filter if found or null
+ * Returns: pointer to Flow Director filter if found or NULL. Lock must be held.
  */
-struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc)
+struct iavf_fdir_fltr *iavf_find_fdir_fltr(struct iavf_adapter *adapter,
+					   bool is_raw, u32 data)
 {
 	struct iavf_fdir_fltr *rule;
 
-	list_for_each_entry(rule, &adapter->fdir_list_head, list)
-		if (rule->loc == loc)
+	list_for_each_entry(rule, &adapter->fdir_list_head, list) {
+		if ((is_raw && rule->cls_u32_handle == data) ||
+		    (!is_raw && rule->loc == data))
 			return rule;
+	}
 
 	return NULL;
 }
 
 /**
- * iavf_fdir_list_add_fltr - add a new node to the flow director filter list
+ * iavf_fdir_add_fltr - add a new node to the flow director filter list
  * @adapter: pointer to the VF adapter structure
  * @fltr: filter node to add to structure
+ *
+ * Return: 0 on success or negative errno on failure.
  */
-void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+int iavf_fdir_add_fltr(struct iavf_adapter *adapter,
+		       struct iavf_fdir_fltr *fltr)
 {
 	struct iavf_fdir_fltr *rule, *parent = NULL;
 
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	if (iavf_fdir_max_reached(adapter)) {
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		dev_err(&adapter->pdev->dev,
+			"Unable to add Flow Director filter (limit (%u) reached)\n",
+			IAVF_MAX_FDIR_FILTERS);
+		return -ENOSPC;
+	}
+
 	list_for_each_entry(rule, &adapter->fdir_list_head, list) {
+		if (iavf_is_raw_fdir(fltr))
+			break;
+
 		if (rule->loc >= fltr->loc)
 			break;
 		parent = rule;
@@ -851,4 +873,55 @@ void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr
 		list_add(&fltr->list, &parent->list);
 	else
 		list_add(&fltr->list, &adapter->fdir_list_head);
+
+	iavf_inc_fdir_active_fltr(adapter, fltr);
+
+	if (adapter->link_up)
+		fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+	else
+		fltr->state = IAVF_FDIR_FLTR_INACTIVE;
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (adapter->link_up)
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_FDIR_FILTER);
+
+	return 0;
+}
+
+/**
+ * iavf_fdir_del_fltr - delete a flow director filter from the list
+ * @adapter: pointer to the VF adapter structure
+ * @is_raw: filter type, is raw (tc u32) or not (ethtool)
+ * @data: data to ID the filter, type dependent
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+int iavf_fdir_del_fltr(struct iavf_adapter *adapter, bool is_raw, u32 data)
+{
+	struct iavf_fdir_fltr *fltr = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	fltr = iavf_find_fdir_fltr(adapter, is_raw, data);
+
+	if (fltr) {
+		if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
+			fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+		} else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) {
+			list_del(&fltr->list);
+			iavf_dec_fdir_active_fltr(adapter, fltr);
+			kfree(fltr);
+			fltr = NULL;
+		} else {
+			err = -EBUSY;
+		}
+	} else if (adapter->fdir_active_fltr) {
+		err = -EINVAL;
+	}
+
+	if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_FDIR_FILTER);
+
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	return err;
 }
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index d31bd923ba8c..e84a5351162f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -117,17 +117,26 @@ struct iavf_fdir_fltr {
 
 	u32 flow_id;
 
+	u32 cls_u32_handle; /* for FDIR added via tc u32 */
 	u32 loc;	/* Rule location inside the flow table */
 	u32 q_index;
 
 	struct virtchnl_fdir_add vc_add_msg;
 };
 
+static inline bool iavf_is_raw_fdir(struct iavf_fdir_fltr *fltr)
+{
+	return !fltr->vc_add_msg.rule_cfg.proto_hdrs.count;
+}
+
 int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
 				  struct iavf_fdir_fltr *fltr);
 int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
-void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
-struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc);
+int iavf_fdir_add_fltr(struct iavf_adapter *adapter,
+		       struct iavf_fdir_fltr *fltr);
+int iavf_fdir_del_fltr(struct iavf_adapter *adapter, bool is_raw, u32 data);
+struct iavf_fdir_fltr *iavf_find_fdir_fltr(struct iavf_adapter *adapter,
+					   bool is_raw, u32 data);
 #endif /* _IAVF_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index ff11bafb3b4f..f782402cd789 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -4013,7 +4013,7 @@ static int iavf_delete_clsflower(struct iavf_adapter *adapter,
 
 /**
  * iavf_setup_tc_cls_flower - flower classifier offloads
- * @adapter: board private structure
+ * @adapter: pointer to iavf adapter structure
  * @cls_flower: pointer to flow_cls_offload struct with flow info
  */
 static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
@@ -4032,6 +4032,154 @@ static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
 }
 
 /**
+ * iavf_add_cls_u32 - Add U32 classifier offloads
+ * @adapter: pointer to iavf adapter structure
+ * @cls_u32: pointer to tc_cls_u32_offload struct with flow info
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int iavf_add_cls_u32(struct iavf_adapter *adapter,
+			    struct tc_cls_u32_offload *cls_u32)
+{
+	struct netlink_ext_ack *extack = cls_u32->common.extack;
+	struct virtchnl_fdir_rule *rule_cfg;
+	struct virtchnl_filter_action *vact;
+	struct virtchnl_proto_hdrs *hdrs;
+	struct ethhdr *spec_h, *mask_h;
+	const struct tc_action *act;
+	struct iavf_fdir_fltr *fltr;
+	struct tcf_exts *exts;
+	unsigned int q_index;
+	int i, status = 0;
+	int off_base = 0;
+
+	if (cls_u32->knode.link_handle) {
+		NL_SET_ERR_MSG_MOD(extack, "Linking not supported");
+		return -EOPNOTSUPP;
+	}
+
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr)
+		return -ENOMEM;
+
+	rule_cfg = &fltr->vc_add_msg.rule_cfg;
+	hdrs = &rule_cfg->proto_hdrs;
+	hdrs->count = 0;
+
+	/* The parser lib at the PF expects the packet starting with MAC hdr */
+	switch (ntohs(cls_u32->common.protocol)) {
+	case ETH_P_802_3:
+		break;
+	case ETH_P_IP:
+		spec_h = (struct ethhdr *)hdrs->raw.spec;
+		mask_h = (struct ethhdr *)hdrs->raw.mask;
+		spec_h->h_proto = htons(ETH_P_IP);
+		mask_h->h_proto = htons(0xFFFF);
+		off_base += ETH_HLEN;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Only 802_3 and ip filter protocols are supported");
+		status = -EOPNOTSUPP;
+		goto free_alloc;
+	}
+
+	for (i = 0; i < cls_u32->knode.sel->nkeys; i++) {
+		__be32 val, mask;
+		int off;
+
+		off = off_base + cls_u32->knode.sel->keys[i].off;
+		val = cls_u32->knode.sel->keys[i].val;
+		mask = cls_u32->knode.sel->keys[i].mask;
+
+		if (off >= sizeof(hdrs->raw.spec)) {
+			NL_SET_ERR_MSG_MOD(extack, "Input exceeds maximum allowed.");
+			status = -EINVAL;
+			goto free_alloc;
+		}
+
+		memcpy(&hdrs->raw.spec[off], &val, sizeof(val));
+		memcpy(&hdrs->raw.mask[off], &mask, sizeof(mask));
+		hdrs->raw.pkt_len = off + sizeof(val);
+	}
+
+	/* Only one action is allowed */
+	rule_cfg->action_set.count = 1;
+	vact = &rule_cfg->action_set.actions[0];
+	exts = cls_u32->knode.exts;
+
+	tcf_exts_for_each_action(i, act, exts) {
+		/* FDIR queue */
+		if (is_tcf_skbedit_rx_queue_mapping(act)) {
+			q_index = tcf_skbedit_rx_queue_mapping(act);
+			if (q_index >= adapter->num_active_queues) {
+				status = -EINVAL;
+				goto free_alloc;
+			}
+
+			vact->type = VIRTCHNL_ACTION_QUEUE;
+			vact->act_conf.queue.index = q_index;
+			break;
+		}
+
+		/* Drop */
+		if (is_tcf_gact_shot(act)) {
+			vact->type = VIRTCHNL_ACTION_DROP;
+			break;
+		}
+
+		/* Unsupported action */
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported action.");
+		status = -EOPNOTSUPP;
+		goto free_alloc;
+	}
+
+	fltr->vc_add_msg.vsi_id = adapter->vsi.id;
+	fltr->cls_u32_handle = cls_u32->knode.handle;
+	return iavf_fdir_add_fltr(adapter, fltr);
+
+free_alloc:
+	kfree(fltr);
+	return status;
+}
+
+/**
+ * iavf_del_cls_u32 - Delete U32 classifier offloads
+ * @adapter: pointer to iavf adapter structure
+ * @cls_u32: pointer to tc_cls_u32_offload struct with flow info
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int iavf_del_cls_u32(struct iavf_adapter *adapter,
+			    struct tc_cls_u32_offload *cls_u32)
+{
+	return iavf_fdir_del_fltr(adapter, true, cls_u32->knode.handle);
+}
+
+/**
+ * iavf_setup_tc_cls_u32 - U32 filter offloads
+ * @adapter: pointer to iavf adapter structure
+ * @cls_u32: pointer to tc_cls_u32_offload struct with flow info
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int iavf_setup_tc_cls_u32(struct iavf_adapter *adapter,
+				 struct tc_cls_u32_offload *cls_u32)
+{
+	if (!TC_U32_SUPPORT(adapter) || !FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return iavf_add_cls_u32(adapter, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return iavf_del_cls_u32(adapter, cls_u32);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
  * iavf_setup_tc_block_cb - block callback for tc
  * @type: type of offload
  * @type_data: offload data
@@ -4050,6 +4198,8 @@ static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return iavf_setup_tc_cls_flower(cb_priv, type_data);
+	case TC_SETUP_CLSU32:
+		return iavf_setup_tc_cls_u32(cb_priv, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4332,8 +4482,8 @@ static void iavf_disable_fdir(struct iavf_adapter *adapter)
 		    fdir->state == IAVF_FDIR_FLTR_INACTIVE) {
 			/* Delete filters not registered in PF */
 			list_del(&fdir->list);
+			iavf_dec_fdir_active_fltr(adapter, fdir);
 			kfree(fdir);
-			adapter->fdir_active_fltr--;
 		} else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING ||
 			   fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST ||
 			   fdir->state == IAVF_FDIR_FLTR_ACTIVE) {
@@ -4843,9 +4993,11 @@ int iavf_process_config(struct iavf_adapter *adapter)
 	/* get HW VLAN features that can be toggled */
 	hw_vlan_features = iavf_get_netdev_vlan_hw_features(adapter);
 
-	/* Enable cloud filter if ADQ is supported */
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+	/* Enable HW TC offload if ADQ or tc U32 is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ ||
+	    TC_U32_SUPPORT(adapter))
 		hw_features |= NETIF_F_HW_TC;
+
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_USO)
 		hw_features |= NETIF_F_GSO_UDP_L4;
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 1e543f6a7c30..7e810b65380c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -142,6 +142,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
+	       VIRTCHNL_VF_OFFLOAD_TC_U32 |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
 	       VIRTCHNL_VF_OFFLOAD_CRC |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
@@ -1961,8 +1962,8 @@ static void iavf_activate_fdir_filters(struct iavf_adapter *adapter)
 			 * list on PF is already cleared after a reset
 			 */
 			list_del(&f->list);
+			iavf_dec_fdir_active_fltr(adapter, f);
 			kfree(f);
-			adapter->fdir_active_fltr--;
 		}
 	}
 	spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -2135,8 +2136,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 						dev_err(&adapter->pdev->dev,
 							"%s\n", msg);
 					list_del(&fdir->list);
+					iavf_dec_fdir_active_fltr(adapter, fdir);
 					kfree(fdir);
-					adapter->fdir_active_fltr--;
 				}
 			}
 			spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -2451,8 +2452,12 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					 list) {
 			if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
 				if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
-					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n",
-						 fdir->loc);
+					if (!iavf_is_raw_fdir(fdir))
+						dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n",
+							 fdir->loc);
+					else
+						dev_info(&adapter->pdev->dev, "Flow Director filter (raw) for TC handle %x is added\n",
+							 TC_U32_USERHTID(fdir->cls_u32_handle));
 					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
 					fdir->flow_id = add_fltr->flow_id;
 				} else {
@@ -2460,8 +2465,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 						 add_fltr->status);
 					iavf_print_fdir_fltr(adapter, fdir);
 					list_del(&fdir->list);
+					iavf_dec_fdir_active_fltr(adapter, fdir);
 					kfree(fdir);
-					adapter->fdir_active_fltr--;
 				}
 			}
 		}
@@ -2479,11 +2484,15 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS ||
 				    del_fltr->status ==
 				    VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) {
-					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
-						 fdir->loc);
+					if (!iavf_is_raw_fdir(fdir))
+						dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
+							 fdir->loc);
+					else
+						dev_info(&adapter->pdev->dev, "Flow Director filter (raw) for TC handle %x is deleted\n",
+							 TC_U32_USERHTID(fdir->cls_u32_handle));
 					list_del(&fdir->list);
+					iavf_dec_fdir_active_fltr(adapter, fdir);
 					kfree(fdir);
-					adapter->fdir_active_fltr--;
 				} else {
 					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
 					dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n",
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 03500e28ac99..3307d551f431 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -28,9 +28,13 @@ ice-y := ice_main.o	\
 	 ice_vlan_mode.o \
 	 ice_flex_pipe.o \
 	 ice_flow.o	\
+	 ice_parser.o    \
+	 ice_parser_rt.o \
 	 ice_idc.o	\
 	 devlink/devlink.o	\
 	 devlink/devlink_port.o \
+	 ice_sf_eth.o	\
+	 ice_sf_vsi_vlan_ops.o \
 	 ice_ddp.o	\
 	 ice_fw_update.o \
 	 ice_lag.o	\
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index 810a901d7afd..415445cefdb2 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -6,9 +6,11 @@
 #include "ice.h"
 #include "ice_lib.h"
 #include "devlink.h"
+#include "devlink_port.h"
 #include "ice_eswitch.h"
 #include "ice_fw_update.h"
 #include "ice_dcb_lib.h"
+#include "ice_sf_eth.h"
 
 /* context for devlink info version reporting */
 struct ice_info_ctx {
@@ -744,6 +746,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
 				 struct ice_sched_node *tc_node, struct ice_pf *pf)
 {
 	struct devlink_rate *rate_node = NULL;
+	struct ice_dynamic_port *sf;
 	struct ice_vf *vf;
 	int i;
 
@@ -755,6 +758,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
 		/* create root node */
 		rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
 	} else if (node->vsi_handle &&
+		   pf->vsi[node->vsi_handle]->type == ICE_VSI_VF &&
 		   pf->vsi[node->vsi_handle]->vf) {
 		vf = pf->vsi[node->vsi_handle]->vf;
 		if (!vf->devlink_port.devlink_rate)
@@ -763,6 +767,16 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
 			 */
 			devl_rate_leaf_create(&vf->devlink_port, node,
 					      node->parent->rate_node);
+	} else if (node->vsi_handle &&
+		   pf->vsi[node->vsi_handle]->type == ICE_VSI_SF &&
+		   pf->vsi[node->vsi_handle]->sf) {
+		sf = pf->vsi[node->vsi_handle]->sf;
+		if (!sf->devlink_port.devlink_rate)
+			/* leaf nodes doesn't have children
+			 * so we don't set rate_node
+			 */
+			devl_rate_leaf_create(&sf->devlink_port, node,
+					      node->parent->rate_node);
 	} else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF &&
 		   node->parent->rate_node) {
 		rate_node = devl_rate_node_create(devlink, node, node->name,
@@ -1277,8 +1291,12 @@ static const struct devlink_ops ice_devlink_ops = {
 
 	.rate_leaf_parent_set = ice_devlink_set_parent,
 	.rate_node_parent_set = ice_devlink_set_parent,
+
+	.port_new = ice_devlink_port_new,
 };
 
+static const struct devlink_ops ice_sf_devlink_ops;
+
 static int
 ice_devlink_enable_roce_get(struct devlink *devlink, u32 id,
 			    struct devlink_param_gset_ctx *ctx)
@@ -1562,6 +1580,34 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
 }
 
 /**
+ * ice_allocate_sf - Allocate devlink and return SF structure pointer
+ * @dev: the device to allocate for
+ * @pf: pointer to the PF structure
+ *
+ * Allocate a devlink instance for SF.
+ *
+ * Return: ice_sf_priv pointer to allocated memory or ERR_PTR in case of error
+ */
+struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf)
+{
+	struct devlink *devlink;
+	int err;
+
+	devlink = devlink_alloc(&ice_sf_devlink_ops, sizeof(struct ice_sf_priv),
+				dev);
+	if (!devlink)
+		return ERR_PTR(-ENOMEM);
+
+	err = devl_nested_devlink_set(priv_to_devlink(pf), devlink);
+	if (err) {
+		devlink_free(devlink);
+		return ERR_PTR(err);
+	}
+
+	return devlink_priv(devlink);
+}
+
+/**
  * ice_devlink_register - Register devlink interface for this PF
  * @pf: the PF to register the devlink for.
  *
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.h b/drivers/net/ethernet/intel/ice/devlink/devlink.h
index d291c0e2e17b..1af3b0763fbb 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.h
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.h
@@ -5,6 +5,7 @@
 #define _ICE_DEVLINK_H_
 
 struct ice_pf *ice_allocate_pf(struct device *dev);
+struct ice_sf_priv *ice_allocate_sf(struct device *dev, struct ice_pf *pf);
 
 void ice_devlink_register(struct ice_pf *pf);
 void ice_devlink_unregister(struct ice_pf *pf);
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
index 00fed5a61d62..928c8bdb6649 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
@@ -5,6 +5,9 @@
 
 #include "ice.h"
 #include "devlink.h"
+#include "devlink_port.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
 
 static int ice_active_port_option = -1;
 
@@ -337,7 +340,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf)
 		return -EIO;
 
 	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
-	attrs.phys.port_number = pf->hw.bus.func;
+	attrs.phys.port_number = pf->hw.pf_id;
 
 	/* As FW supports only port split options for whole device,
 	 * set port split options only for first PF.
@@ -455,7 +458,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf)
 		return -EINVAL;
 
 	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF;
-	attrs.pci_vf.pf = pf->hw.bus.func;
+	attrs.pci_vf.pf = pf->hw.pf_id;
 	attrs.pci_vf.vf = vf->vf_id;
 
 	ice_devlink_set_switch_id(pf, &attrs.switch_id);
@@ -485,3 +488,506 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf)
 	devl_rate_leaf_destroy(&vf->devlink_port);
 	devl_port_unregister(&vf->devlink_port);
 }
+
+/**
+ * ice_devlink_create_sf_dev_port - Register virtual port for a subfunction
+ * @sf_dev: the subfunction device to create a devlink port for
+ *
+ * Register virtual flavour devlink port for the subfunction auxiliary device
+ * created after activating a dynamically added devlink port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_sf_dev_port(struct ice_sf_dev *sf_dev)
+{
+	struct devlink_port_attrs attrs = {};
+	struct ice_dynamic_port *dyn_port;
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	struct ice_vsi *vsi;
+
+	dyn_port = sf_dev->dyn_port;
+	vsi = dyn_port->vsi;
+
+	devlink_port = &sf_dev->priv->devlink_port;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+
+	devlink_port_attrs_set(devlink_port, &attrs);
+	devlink = priv_to_devlink(sf_dev->priv);
+
+	return devl_port_register(devlink, devlink_port, vsi->idx);
+}
+
+/**
+ * ice_devlink_destroy_sf_dev_port - Destroy virtual port for a subfunction
+ * @sf_dev: the subfunction device to create a devlink port for
+ *
+ * Unregisters the virtual port associated with this subfunction.
+ */
+void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev *sf_dev)
+{
+	devl_port_unregister(&sf_dev->priv->devlink_port);
+}
+
+/**
+ * ice_activate_dynamic_port - Activate a dynamic port
+ * @dyn_port: dynamic port instance to activate
+ * @extack: extack for reporting error messages
+ *
+ * Activate the dynamic port based on its flavour.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_activate_dynamic_port(struct ice_dynamic_port *dyn_port,
+			  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (dyn_port->active)
+		return 0;
+
+	err = ice_sf_eth_activate(dyn_port, extack);
+	if (err)
+		return err;
+
+	dyn_port->active = true;
+
+	return 0;
+}
+
+/**
+ * ice_deactivate_dynamic_port - Deactivate a dynamic port
+ * @dyn_port: dynamic port instance to deactivate
+ *
+ * Undo activation of a dynamic port.
+ */
+static void ice_deactivate_dynamic_port(struct ice_dynamic_port *dyn_port)
+{
+	if (!dyn_port->active)
+		return;
+
+	ice_sf_eth_deactivate(dyn_port);
+	dyn_port->active = false;
+}
+
+/**
+ * ice_dealloc_dynamic_port - Deallocate and remove a dynamic port
+ * @dyn_port: dynamic port instance to deallocate
+ *
+ * Free resources associated with a dynamically added devlink port. Will
+ * deactivate the port if its currently active.
+ */
+static void ice_dealloc_dynamic_port(struct ice_dynamic_port *dyn_port)
+{
+	struct devlink_port *devlink_port = &dyn_port->devlink_port;
+	struct ice_pf *pf = dyn_port->pf;
+
+	ice_deactivate_dynamic_port(dyn_port);
+
+	xa_erase(&pf->sf_nums, devlink_port->attrs.pci_sf.sf);
+	ice_eswitch_detach_sf(pf, dyn_port);
+	ice_vsi_free(dyn_port->vsi);
+	xa_erase(&pf->dyn_ports, dyn_port->vsi->idx);
+	kfree(dyn_port);
+}
+
+/**
+ * ice_dealloc_all_dynamic_ports - Deallocate all dynamic devlink ports
+ * @pf: pointer to the pf structure
+ */
+void ice_dealloc_all_dynamic_ports(struct ice_pf *pf)
+{
+	struct ice_dynamic_port *dyn_port;
+	unsigned long index;
+
+	xa_for_each(&pf->dyn_ports, index, dyn_port)
+		ice_dealloc_dynamic_port(dyn_port);
+}
+
+/**
+ * ice_devlink_port_new_check_attr - Check that new port attributes are valid
+ * @pf: pointer to the PF structure
+ * @new_attr: the attributes for the new port
+ * @extack: extack for reporting error messages
+ *
+ * Check that the attributes for the new port are valid before continuing to
+ * allocate the devlink port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_new_check_attr(struct ice_pf *pf,
+				const struct devlink_port_new_attrs *new_attr,
+				struct netlink_ext_ack *extack)
+{
+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->controller_valid) {
+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->port_index_valid) {
+		NL_SET_ERR_MSG_MOD(extack, "Driver does not support user defined port index assignment");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->pfnum != pf->hw.pf_id) {
+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
+		return -EINVAL;
+	}
+
+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_del - devlink handler for port delete
+ * @devlink: pointer to devlink
+ * @port: devlink port to be deleted
+ * @extack: pointer to extack
+ *
+ * Deletes devlink port and deallocates all resources associated with
+ * created subfunction.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_del(struct devlink *devlink, struct devlink_port *port,
+		     struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+	ice_dealloc_dynamic_port(dyn_port);
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_fn_hw_addr_set - devlink handler for mac address set
+ * @port: pointer to devlink port
+ * @hw_addr: hw address to set
+ * @hw_addr_len: hw address length
+ * @extack: extack for reporting error messages
+ *
+ * Sets mac address for the port, verifies arguments and copies address
+ * to the subfunction structure.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 *hw_addr,
+				int hw_addr_len,
+				struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+
+	if (dyn_port->attached) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Ethernet address can be change only in detached state");
+		return -EBUSY;
+	}
+
+	if (hw_addr_len != ETH_ALEN || !is_valid_ether_addr(hw_addr)) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid ethernet address");
+		return -EADDRNOTAVAIL;
+	}
+
+	ether_addr_copy(dyn_port->hw_addr, hw_addr);
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_fn_hw_addr_get - devlink handler for mac address get
+ * @port: pointer to devlink port
+ * @hw_addr: hw address to set
+ * @hw_addr_len: hw address length
+ * @extack: extack for reporting error messages
+ *
+ * Returns mac address for the port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_fn_hw_addr_get(struct devlink_port *port, u8 *hw_addr,
+				int *hw_addr_len,
+				struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+
+	ether_addr_copy(hw_addr, dyn_port->hw_addr);
+	*hw_addr_len = ETH_ALEN;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_fn_state_set - devlink handler for port state set
+ * @port: pointer to devlink port
+ * @state: state to set
+ * @extack: extack for reporting error messages
+ *
+ * Activates or deactivates the port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_fn_state_set(struct devlink_port *port,
+			      enum devlink_port_fn_state state,
+			      struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+
+	switch (state) {
+	case DEVLINK_PORT_FN_STATE_ACTIVE:
+		return ice_activate_dynamic_port(dyn_port, extack);
+
+	case DEVLINK_PORT_FN_STATE_INACTIVE:
+		ice_deactivate_dynamic_port(dyn_port);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_fn_state_get - devlink handler for port state get
+ * @port: pointer to devlink port
+ * @state: admin configured state of the port
+ * @opstate: current port operational state
+ * @extack: extack for reporting error messages
+ *
+ * Gets port state.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_fn_state_get(struct devlink_port *port,
+			      enum devlink_port_fn_state *state,
+			      enum devlink_port_fn_opstate *opstate,
+			      struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+
+	if (dyn_port->active)
+		*state = DEVLINK_PORT_FN_STATE_ACTIVE;
+	else
+		*state = DEVLINK_PORT_FN_STATE_INACTIVE;
+
+	if (dyn_port->attached)
+		*opstate = DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+	else
+		*opstate = DEVLINK_PORT_FN_OPSTATE_DETACHED;
+
+	return 0;
+}
+
+static const struct devlink_port_ops ice_devlink_port_sf_ops = {
+	.port_del = ice_devlink_port_del,
+	.port_fn_hw_addr_get = ice_devlink_port_fn_hw_addr_get,
+	.port_fn_hw_addr_set = ice_devlink_port_fn_hw_addr_set,
+	.port_fn_state_get = ice_devlink_port_fn_state_get,
+	.port_fn_state_set = ice_devlink_port_fn_state_set,
+};
+
+/**
+ * ice_reserve_sf_num - Reserve a subfunction number for this port
+ * @pf: pointer to the pf structure
+ * @new_attr: devlink port attributes requested
+ * @extack: extack for reporting error messages
+ * @sfnum: on success, the sf number reserved
+ *
+ * Reserve a subfunction number for this port. Only called for
+ * DEVLINK_PORT_FLAVOUR_PCI_SF ports.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_reserve_sf_num(struct ice_pf *pf,
+		   const struct devlink_port_new_attrs *new_attr,
+		   struct netlink_ext_ack *extack, u32 *sfnum)
+{
+	int err;
+
+	/* If user didn't request an explicit number, pick one */
+	if (!new_attr->sfnum_valid)
+		return xa_alloc(&pf->sf_nums, sfnum, NULL, xa_limit_32b,
+				GFP_KERNEL);
+
+	/* Otherwise, check and use the number provided */
+	err = xa_insert(&pf->sf_nums, new_attr->sfnum, NULL, GFP_KERNEL);
+	if (err) {
+		if (err == -EBUSY)
+			NL_SET_ERR_MSG_MOD(extack, "Subfunction with given sfnum already exists");
+		return err;
+	}
+
+	*sfnum = new_attr->sfnum;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_create_sf_port - Register PCI subfunction devlink port
+ * @dyn_port: the dynamic port instance structure for this subfunction
+ *
+ * Register PCI subfunction flavour devlink port for a dynamically added
+ * subfunction port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
+{
+	struct devlink_port_attrs attrs = {};
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	struct ice_vsi *vsi;
+	struct ice_pf *pf;
+
+	vsi = dyn_port->vsi;
+	pf = dyn_port->pf;
+
+	devlink_port = &dyn_port->devlink_port;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
+	attrs.pci_sf.pf = pf->hw.pf_id;
+	attrs.pci_sf.sf = dyn_port->sfnum;
+
+	devlink_port_attrs_set(devlink_port, &attrs);
+	devlink = priv_to_devlink(pf);
+
+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
+					   &ice_devlink_port_sf_ops);
+}
+
+/**
+ * ice_devlink_destroy_sf_port - Destroy the devlink_port for this SF
+ * @dyn_port: the dynamic port instance structure for this subfunction
+ *
+ * Unregisters the devlink_port structure associated with this SF.
+ */
+void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port)
+{
+	devl_rate_leaf_destroy(&dyn_port->devlink_port);
+	devl_port_unregister(&dyn_port->devlink_port);
+}
+
+/**
+ * ice_alloc_dynamic_port - Allocate new dynamic port
+ * @pf: pointer to the pf structure
+ * @new_attr: devlink port attributes requested
+ * @extack: extack for reporting error messages
+ * @devlink_port: index of newly created devlink port
+ *
+ * Allocate a new dynamic port instance and prepare it for configuration
+ * with devlink.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_alloc_dynamic_port(struct ice_pf *pf,
+		       const struct devlink_port_new_attrs *new_attr,
+		       struct netlink_ext_ack *extack,
+		       struct devlink_port **devlink_port)
+{
+	struct ice_dynamic_port *dyn_port;
+	struct ice_vsi *vsi;
+	u32 sfnum;
+	int err;
+
+	err = ice_reserve_sf_num(pf, new_attr, extack, &sfnum);
+	if (err)
+		return err;
+
+	dyn_port = kzalloc(sizeof(*dyn_port), GFP_KERNEL);
+	if (!dyn_port) {
+		err = -ENOMEM;
+		goto unroll_reserve_sf_num;
+	}
+
+	vsi = ice_vsi_alloc(pf);
+	if (!vsi) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to allocate VSI");
+		err = -ENOMEM;
+		goto unroll_dyn_port_alloc;
+	}
+
+	dyn_port->vsi = vsi;
+	dyn_port->pf = pf;
+	dyn_port->sfnum = sfnum;
+	eth_random_addr(dyn_port->hw_addr);
+
+	err = xa_insert(&pf->dyn_ports, vsi->idx, dyn_port, GFP_KERNEL);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Port index reservation failed");
+		goto unroll_vsi_alloc;
+	}
+
+	err = ice_eswitch_attach_sf(pf, dyn_port);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to attach SF to eswitch");
+		goto unroll_xa_insert;
+	}
+
+	*devlink_port = &dyn_port->devlink_port;
+
+	return 0;
+
+unroll_xa_insert:
+	xa_erase(&pf->dyn_ports, vsi->idx);
+unroll_vsi_alloc:
+	ice_vsi_free(vsi);
+unroll_dyn_port_alloc:
+	kfree(dyn_port);
+unroll_reserve_sf_num:
+	xa_erase(&pf->sf_nums, sfnum);
+
+	return err;
+}
+
+/**
+ * ice_devlink_port_new - devlink handler for the new port
+ * @devlink: pointer to devlink
+ * @new_attr: pointer to the port new attributes
+ * @extack: extack for reporting error messages
+ * @devlink_port: pointer to a new port
+ *
+ * Creates new devlink port, checks new port attributes and reject
+ * any unsupported parameters, allocates new subfunction for that port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int
+ice_devlink_port_new(struct devlink *devlink,
+		     const struct devlink_port_new_attrs *new_attr,
+		     struct netlink_ext_ack *extack,
+		     struct devlink_port **devlink_port)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	int err;
+
+	err = ice_devlink_port_new_check_attr(pf, new_attr, extack);
+	if (err)
+		return err;
+
+	return ice_alloc_dynamic_port(pf, new_attr, extack, devlink_port);
+}
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
index 9223bcdb6444..d60efc340945 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
@@ -4,9 +4,55 @@
 #ifndef _DEVLINK_PORT_H_
 #define _DEVLINK_PORT_H_
 
+#include "../ice.h"
+#include "../ice_sf_eth.h"
+
+/**
+ * struct ice_dynamic_port - Track dynamically added devlink port instance
+ * @hw_addr: the HW address for this port
+ * @active: true if the port has been activated
+ * @attached: true it the prot is attached
+ * @devlink_port: the associated devlink port structure
+ * @pf: pointer to the PF private structure
+ * @vsi: the VSI associated with this port
+ * @repr_id: the representor ID
+ * @sfnum: the subfunction ID
+ * @sf_dev: pointer to the subfunction device
+ *
+ * An instance of a dynamically added devlink port. Each port flavour
+ */
+struct ice_dynamic_port {
+	u8 hw_addr[ETH_ALEN];
+	u8 active: 1;
+	u8 attached: 1;
+	struct devlink_port devlink_port;
+	struct ice_pf *pf;
+	struct ice_vsi *vsi;
+	unsigned long repr_id;
+	u32 sfnum;
+	/* Flavour-specific implementation data */
+	union {
+		struct ice_sf_dev *sf_dev;
+	};
+};
+
+void ice_dealloc_all_dynamic_ports(struct ice_pf *pf);
+
 int ice_devlink_create_pf_port(struct ice_pf *pf);
 void ice_devlink_destroy_pf_port(struct ice_pf *pf);
 int ice_devlink_create_vf_port(struct ice_vf *vf);
 void ice_devlink_destroy_vf_port(struct ice_vf *vf);
+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port);
+void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port);
+int ice_devlink_create_sf_dev_port(struct ice_sf_dev *sf_dev);
+void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev *sf_dev);
+
+#define ice_devlink_port_to_dyn(port) \
+	container_of(port, struct ice_dynamic_port, devlink_port)
 
+int
+ice_devlink_port_new(struct devlink *devlink,
+		     const struct devlink_port_new_attrs *new_attr,
+		     struct netlink_ext_ack *extack,
+		     struct devlink_port **devlink_port);
 #endif /* _DEVLINK_PORT_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 99a75a59078e..d6f80da30dec 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -318,6 +318,7 @@ enum ice_vsi_state {
 	ICE_VSI_UMAC_FLTR_CHANGED,
 	ICE_VSI_MMAC_FLTR_CHANGED,
 	ICE_VSI_PROMISC_CHANGED,
+	ICE_VSI_REBUILD_PENDING,
 	ICE_VSI_STATE_NBITS		/* must be last */
 };
 
@@ -411,6 +412,7 @@ struct ice_vsi {
 	struct ice_tx_ring **xdp_rings;	 /* XDP ring array */
 	u16 num_xdp_txq;		 /* Used XDP queues */
 	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	struct mutex xdp_state_lock;
 
 	struct net_device **target_netdevs;
 
@@ -449,7 +451,12 @@ struct ice_vsi {
 	struct_group_tagged(ice_vsi_cfg_params, params,
 		struct ice_port_info *port_info; /* back pointer to port_info */
 		struct ice_channel *ch; /* VSI's channel structure, may be NULL */
-		struct ice_vf *vf; /* VF associated with this VSI, may be NULL */
+		union {
+			/* VF associated with this VSI, may be NULL */
+			struct ice_vf *vf;
+			/* SF associated with this VSI, may be NULL */
+			struct ice_dynamic_port *sf;
+		};
 		u32 flags; /* VSI flags used for rebuild and configuration */
 		enum ice_vsi_type type; /* the type of the VSI */
 	);
@@ -650,6 +657,9 @@ struct ice_pf {
 	struct ice_eswitch eswitch;
 	struct ice_esw_br_port *br_port;
 
+	struct xarray dyn_ports;
+	struct xarray sf_nums;
+
 #define ICE_INVALID_AGG_NODE_ID		0
 #define ICE_PF_AGG_NODE_ID_START	1
 #define ICE_MAX_PF_AGG_NODES		32
@@ -765,18 +775,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi,
 }
 
 /**
- * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * ice_rx_xsk_pool - assign XSK buff pool to Rx ring
  * @ring: Rx ring to use
  *
- * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
- * present, NULL otherwise.
+ * Sets XSK buff pool pointer on Rx ring.
  */
-static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
+static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring)
 {
 	struct ice_vsi *vsi = ring->vsi;
 	u16 qid = ring->q_index;
 
-	return ice_get_xp_from_qid(vsi, qid);
+	WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid));
 }
 
 /**
@@ -801,7 +810,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
 	if (!ring)
 		return;
 
-	ring->xsk_pool = ice_get_xp_from_qid(vsi, qid);
+	WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid));
 }
 
 /**
@@ -917,6 +926,7 @@ int ice_vsi_open(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
 void ice_set_ethtool_repr_ops(struct net_device *netdev);
 void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+void ice_set_ethtool_sf_ops(struct net_device *netdev);
 u16 ice_get_avail_txq_count(struct ice_pf *pf);
 u16 ice_get_avail_rxq_count(struct ice_pf *pf);
 int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
@@ -1002,6 +1012,14 @@ void ice_unload(struct ice_pf *pf);
 void ice_adv_lnk_speed_maps_init(void);
 int ice_init_dev(struct ice_pf *pf);
 void ice_deinit_dev(struct ice_pf *pf);
+int ice_change_mtu(struct net_device *netdev, int new_mtu);
+void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+void ice_set_netdev_features(struct net_device *netdev);
+int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid);
+int ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid);
+void ice_get_stats64(struct net_device *netdev,
+		     struct rtnl_link_stats64 *stats);
 
 /**
  * ice_set_rdma_cap - enable RDMA support
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 66f02988d549..0be1a98d7cc1 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -2632,12 +2632,16 @@ struct ice_aq_desc {
 /* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
 #define ICE_AQ_LG_BUF	512
 
+#define ICE_AQ_FLAG_DD_S	0
+#define ICE_AQ_FLAG_CMP_S	1
 #define ICE_AQ_FLAG_ERR_S	2
 #define ICE_AQ_FLAG_LB_S	9
 #define ICE_AQ_FLAG_RD_S	10
 #define ICE_AQ_FLAG_BUF_S	12
 #define ICE_AQ_FLAG_SI_S	13
 
+#define ICE_AQ_FLAG_DD		BIT(ICE_AQ_FLAG_DD_S)  /* 0x1    */
+#define ICE_AQ_FLAG_CMP		BIT(ICE_AQ_FLAG_CMP_S) /* 0x2    */
 #define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
 #define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
 #define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 5d396c1a7731..4a9a6899fc45 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -190,16 +190,11 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
 	}
 	q_vector = vsi->q_vectors[v_idx];
 
-	ice_for_each_tx_ring(tx_ring, q_vector->tx) {
-		ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX,
-				   NULL);
+	ice_for_each_tx_ring(tx_ring, vsi->q_vectors[v_idx]->tx)
 		tx_ring->q_vector = NULL;
-	}
-	ice_for_each_rx_ring(rx_ring, q_vector->rx) {
-		ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX,
-				   NULL);
+
+	ice_for_each_rx_ring(rx_ring, vsi->q_vectors[v_idx]->rx)
 		rx_ring->q_vector = NULL;
-	}
 
 	/* only VSI with an associated netdev is set up with NAPI */
 	if (vsi->netdev)
@@ -330,6 +325,9 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf
 		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id;
 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
 		break;
+	case ICE_VSI_SF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+		break;
 	default:
 		return;
 	}
@@ -513,6 +511,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
 }
 
 /**
+ * ice_get_frame_sz - calculate xdp_buff::frame_sz
+ * @rx_ring: the ring being configured
+ *
+ * Return frame size based on underlying PAGE_SIZE
+ */
+static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring)
+{
+	unsigned int frame_sz;
+
+#if (PAGE_SIZE >= 8192)
+	frame_sz = rx_ring->rx_buf_len;
+#else
+	frame_sz = ice_rx_pg_size(rx_ring) / 2;
+#endif
+
+	return frame_sz;
+}
+
+/**
  * ice_vsi_cfg_rxq - Configure an Rx queue
  * @ring: the ring being configured
  *
@@ -526,7 +543,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 
 	ring->rx_buf_len = ring->vsi->rx_buf_len;
 
-	if (ring->vsi->type == ICE_VSI_PF) {
+	if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) {
 		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
 			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
 						 ring->q_index,
@@ -536,7 +553,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 				return err;
 		}
 
-		ring->xsk_pool = ice_xsk_pool(ring);
+		ice_rx_xsk_pool(ring);
 		if (ring->xsk_pool) {
 			xdp_rxq_info_unreg(&ring->xdp_rxq);
 
@@ -576,7 +593,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 		}
 	}
 
-	xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+	xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq);
 	ring->xdp.data = NULL;
 	ring->xdp_ext.pkt_ctx = &ring->pkt_ctx;
 	err = ice_setup_rx_ctx(ring);
@@ -597,7 +614,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 			return 0;
 		}
 
-		ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
+		ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs);
 		if (!ok) {
 			u16 pf_q = ring->vsi->rxq_map[ring->q_index];
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 66f29bac783a..27208a60cece 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -10,6 +10,7 @@
 #include "ice_type.h"
 #include "ice_nvm.h"
 #include "ice_flex_pipe.h"
+#include "ice_parser.h"
 #include <linux/avf/virtchnl.h>
 #include "ice_switch.h"
 #include "ice_fdir.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index ffaa6511c455..e3959ad442a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -99,17 +99,6 @@ ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 		return -ENOMEM;
 	cq->sq.desc_buf.size = size;
 
-	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
-				      sizeof(struct ice_sq_cd), GFP_KERNEL);
-	if (!cq->sq.cmd_buf) {
-		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
-				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
-		cq->sq.desc_buf.va = NULL;
-		cq->sq.desc_buf.pa = 0;
-		cq->sq.desc_buf.size = 0;
-		return -ENOMEM;
-	}
-
 	return 0;
 }
 
@@ -188,7 +177,7 @@ ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
 			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
 		desc->opcode = 0;
-		/* This is in accordance with Admin queue design, there is no
+		/* This is in accordance with control queue design, there is no
 		 * register for buffer size configuration
 		 */
 		desc->datalen = cpu_to_le16(bi->size);
@@ -338,8 +327,6 @@ do {									\
 					(qi)->ring.r.ring##_bi[i].size = 0;\
 		}							\
 	}								\
-	/* free the buffer info list */					\
-	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);		\
 	/* free DMA head */						\
 	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
 } while (0)
@@ -405,11 +392,11 @@ init_ctrlq_exit:
 }
 
 /**
- * ice_init_rq - initialize ARQ
+ * ice_init_rq - initialize receive side of a control queue
  * @hw: pointer to the hardware structure
  * @cq: pointer to the specific Control queue
  *
- * The main initialization routine for the Admin Receive (Event) Queue.
+ * The main initialization routine for Receive side of a control queue.
  * Prior to calling this function, the driver *MUST* set the following fields
  * in the cq->structure:
  *     - cq->num_rq_entries
@@ -465,7 +452,7 @@ init_ctrlq_exit:
 }
 
 /**
- * ice_shutdown_sq - shutdown the Control ATQ
+ * ice_shutdown_sq - shutdown the transmit side of a control queue
  * @hw: pointer to the hardware structure
  * @cq: pointer to the specific Control queue
  *
@@ -482,7 +469,7 @@ static int ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 		goto shutdown_sq_out;
 	}
 
-	/* Stop firmware AdminQ processing */
+	/* Stop processing of the control queue */
 	wr32(hw, cq->sq.head, 0);
 	wr32(hw, cq->sq.tail, 0);
 	wr32(hw, cq->sq.len, 0);
@@ -501,7 +488,7 @@ shutdown_sq_out:
 }
 
 /**
- * ice_aq_ver_check - Check the reported AQ API version.
+ * ice_aq_ver_check - Check the reported AQ API version
  * @hw: pointer to the hardware structure
  *
  * Checks if the driver should load on a given AQ API version.
@@ -521,14 +508,20 @@ static bool ice_aq_ver_check(struct ice_hw *hw)
 	} else if (hw->api_maj_ver == exp_fw_api_ver_major) {
 		if (hw->api_min_ver > (exp_fw_api_ver_minor + 2))
 			dev_info(ice_hw_to_dev(hw),
-				 "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+				 "The driver for the device detected a newer version (%u.%u) of the NVM image than expected (%u.%u). Please install the most recent version of the network driver.\n",
+				 hw->api_maj_ver, hw->api_min_ver,
+				 exp_fw_api_ver_major, exp_fw_api_ver_minor);
 		else if ((hw->api_min_ver + 2) < exp_fw_api_ver_minor)
 			dev_info(ice_hw_to_dev(hw),
-				 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+				 "The driver for the device detected an older version (%u.%u) of the NVM image than expected (%u.%u). Please update the NVM image.\n",
+				 hw->api_maj_ver, hw->api_min_ver,
+				 exp_fw_api_ver_major, exp_fw_api_ver_minor);
 	} else {
 		/* Major API version is older than expected, log a warning */
 		dev_info(ice_hw_to_dev(hw),
-			 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+			 "The driver for the device detected an older version (%u.%u) of the NVM image than expected (%u.%u). Please update the NVM image.\n",
+			 hw->api_maj_ver, hw->api_min_ver,
+			 exp_fw_api_ver_major, exp_fw_api_ver_minor);
 	}
 	return true;
 }
@@ -855,7 +848,7 @@ void ice_destroy_all_ctrlq(struct ice_hw *hw)
 }
 
 /**
- * ice_clean_sq - cleans Admin send queue (ATQ)
+ * ice_clean_sq - cleans send side of a control queue
  * @hw: pointer to the hardware structure
  * @cq: pointer to the specific Control queue
  *
@@ -865,21 +858,17 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 {
 	struct ice_ctl_q_ring *sq = &cq->sq;
 	u16 ntc = sq->next_to_clean;
-	struct ice_sq_cd *details;
 	struct ice_aq_desc *desc;
 
 	desc = ICE_CTL_Q_DESC(*sq, ntc);
-	details = ICE_CTL_Q_DETAILS(*sq, ntc);
 
 	while (rd32(hw, cq->sq.head) != ntc) {
 		ice_debug(hw, ICE_DBG_AQ_MSG, "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
 		memset(desc, 0, sizeof(*desc));
-		memset(details, 0, sizeof(*details));
 		ntc++;
 		if (ntc == sq->count)
 			ntc = 0;
 		desc = ICE_CTL_Q_DESC(*sq, ntc);
-		details = ICE_CTL_Q_DETAILS(*sq, ntc);
 	}
 
 	sq->next_to_clean = ntc;
@@ -888,18 +877,43 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 }
 
 /**
+ * ice_ctl_q_str - Convert control queue type to string
+ * @qtype: the control queue type
+ *
+ * Return: A string name for the given control queue type.
+ */
+static const char *ice_ctl_q_str(enum ice_ctl_q qtype)
+{
+	switch (qtype) {
+	case ICE_CTL_Q_UNKNOWN:
+		return "Unknown CQ";
+	case ICE_CTL_Q_ADMIN:
+		return "AQ";
+	case ICE_CTL_Q_MAILBOX:
+		return "MBXQ";
+	case ICE_CTL_Q_SB:
+		return "SBQ";
+	default:
+		return "Unrecognized CQ";
+	}
+}
+
+/**
  * ice_debug_cq
  * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
  * @desc: pointer to control queue descriptor
  * @buf: pointer to command buffer
  * @buf_len: max length of buf
+ * @response: true if this is the writeback response
  *
  * Dumps debug log about control command with descriptor contents.
  */
-static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+static void ice_debug_cq(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+			 void *desc, void *buf, u16 buf_len, bool response)
 {
 	struct ice_aq_desc *cq_desc = desc;
-	u16 len;
+	u16 datalen, flags;
 
 	if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
 	    !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
@@ -908,48 +922,63 @@ static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
 	if (!desc)
 		return;
 
-	len = le16_to_cpu(cq_desc->datalen);
+	datalen = le16_to_cpu(cq_desc->datalen);
+	flags = le16_to_cpu(cq_desc->flags);
 
-	ice_debug(hw, ICE_DBG_AQ_DESC, "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
-		  le16_to_cpu(cq_desc->opcode),
-		  le16_to_cpu(cq_desc->flags),
-		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
-	ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+	ice_debug(hw, ICE_DBG_AQ_DESC, "%s %s: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n\tcookie (h,l) 0x%08X 0x%08X\n\tparam (0,1)  0x%08X 0x%08X\n\taddr (h,l)   0x%08X 0x%08X\n",
+		  ice_ctl_q_str(cq->qtype), response ? "Response" : "Command",
+		  le16_to_cpu(cq_desc->opcode), flags, datalen,
+		  le16_to_cpu(cq_desc->retval),
 		  le32_to_cpu(cq_desc->cookie_high),
-		  le32_to_cpu(cq_desc->cookie_low));
-	ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_low),
 		  le32_to_cpu(cq_desc->params.generic.param0),
-		  le32_to_cpu(cq_desc->params.generic.param1));
-	ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param1),
 		  le32_to_cpu(cq_desc->params.generic.addr_high),
 		  le32_to_cpu(cq_desc->params.generic.addr_low));
-	if (buf && cq_desc->datalen != 0) {
-		ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
-		if (buf_len < len)
-			len = buf_len;
-
-		ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len);
+	/* Dump buffer iff 1) one exists and 2) is either a response indicated
+	 * by the DD and/or CMP flag set or a command with the RD flag set.
+	 */
+	if (buf && cq_desc->datalen &&
+	    (flags & (ICE_AQ_FLAG_DD | ICE_AQ_FLAG_CMP | ICE_AQ_FLAG_RD))) {
+		char prefix[] = KBUILD_MODNAME " 0x12341234 0x12341234 ";
+
+		sprintf(prefix, KBUILD_MODNAME " 0x%08X 0x%08X ",
+			le32_to_cpu(cq_desc->params.generic.addr_high),
+			le32_to_cpu(cq_desc->params.generic.addr_low));
+		ice_debug_array_w_prefix(hw, ICE_DBG_AQ_DESC_BUF, prefix,
+					 buf,
+					 min_t(u16, buf_len, datalen));
 	}
 }
 
 /**
- * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * ice_sq_done - poll until the last send on a control queue has completed
  * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  *
- * Returns true if the firmware has processed all descriptors on the
- * admin send queue. Returns false if there are still requests pending.
+ * Use read_poll_timeout to poll the control queue head, checking until it
+ * matches next_to_use. According to the control queue designers, this has
+ * better timing reliability than the DD bit.
+ *
+ * Return: true if all the descriptors on the send side of a control queue
+ *         are finished processing, false otherwise.
  */
 static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 {
-	/* AQ designers suggest use of head for better
-	 * timing reliability than DD bit
+	u32 head;
+
+	/* Wait a short time before the initial check, to allow hardware time
+	 * for completion.
 	 */
-	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+	udelay(5);
+
+	return !rd32_poll_timeout(hw, cq->sq.head,
+				  head, head == cq->sq.next_to_use,
+				  20, ICE_CTL_Q_SQ_CMD_TIMEOUT);
 }
 
 /**
- * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * ice_sq_send_cmd - send command to a control queue
  * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  * @desc: prefilled descriptor describing the command
@@ -957,8 +986,9 @@ static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
  * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
  * @cd: pointer to command details structure
  *
- * This is the main send command routine for the ATQ. It runs the queue,
- * cleans the queue, etc.
+ * Main command for the transmit side of a control queue. It puts the command
+ * on the queue, bumps the tail, waits for processing of the command, captures
+ * command status and results, etc.
  */
 int
 ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
@@ -968,8 +998,6 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	struct ice_dma_mem *dma_buf = NULL;
 	struct ice_aq_desc *desc_on_ring;
 	bool cmd_completed = false;
-	struct ice_sq_cd *details;
-	unsigned long timeout;
 	int status = 0;
 	u16 retval = 0;
 	u32 val = 0;
@@ -1013,12 +1041,6 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		goto sq_send_command_error;
 	}
 
-	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
-	if (cd)
-		*details = *cd;
-	else
-		memset(details, 0, sizeof(*details));
-
 	/* Call clean and check queue available function to reclaim the
 	 * descriptors that were processed by FW/MBX; the function returns the
 	 * number of desc available. The clean function called here could be
@@ -1055,7 +1077,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	/* Debug desc and buffer */
 	ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n");
 
-	ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
+	ice_debug_cq(hw, cq, (void *)desc_on_ring, buf, buf_size, false);
 
 	(cq->sq.next_to_use)++;
 	if (cq->sq.next_to_use == cq->sq.count)
@@ -1063,20 +1085,9 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
 	ice_flush(hw);
 
-	/* Wait a short time before initial ice_sq_done() check, to allow
-	 * hardware time for completion.
+	/* Wait for the command to complete. If it finishes within the
+	 * timeout, copy the descriptor back to temp.
 	 */
-	udelay(5);
-
-	timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT;
-	do {
-		if (ice_sq_done(hw, cq))
-			break;
-
-		usleep_range(100, 150);
-	} while (time_before(jiffies, timeout));
-
-	/* if ready, copy the desc back to temp */
 	if (ice_sq_done(hw, cq)) {
 		memcpy(desc, desc_on_ring, sizeof(*desc));
 		if (buf) {
@@ -1108,12 +1119,11 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 
 	ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n");
 
-	ice_debug_cq(hw, (void *)desc, buf, buf_size);
+	ice_debug_cq(hw, cq, (void *)desc, buf, buf_size, true);
 
 	/* save writeback AQ if requested */
-	if (details->wb_desc)
-		memcpy(details->wb_desc, desc_on_ring,
-		       sizeof(*details->wb_desc));
+	if (cd && cd->wb_desc)
+		memcpy(cd->wb_desc, desc_on_ring, sizeof(*cd->wb_desc));
 
 	/* update the error if time out occurred */
 	if (!cmd_completed) {
@@ -1154,9 +1164,9 @@ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
  * @e: event info from the receive descriptor, includes any buffers
  * @pending: number of events that could be left to process
  *
- * This function cleans one Admin Receive Queue element and returns
- * the contents through e. It can also return how many events are
- * left to process through 'pending'.
+ * Clean one element from the receive side of a control queue. On return 'e'
+ * contains contents of the message, and 'pending' contains the number of
+ * events left to process.
  */
 int
 ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
@@ -1212,7 +1222,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 
 	ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
 
-	ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
+	ice_debug_cq(hw, cq, (void *)desc, e->msg_buf, cq->rq_buf_size, true);
 
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message size
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 1d54b1cdb1c5..ca97b7365a1b 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -43,14 +43,13 @@ enum ice_ctl_q {
 };
 
 /* Control Queue timeout settings - max delay 1s */
-#define ICE_CTL_Q_SQ_CMD_TIMEOUT	HZ    /* Wait max 1s */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	USEC_PER_SEC
 #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
 #define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
 
 struct ice_ctl_q_ring {
 	void *dma_head;			/* Virtual address to DMA head */
 	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
-	void *cmd_buf;			/* command buffer memory */
 
 	union {
 		struct ice_dma_mem *sq_bi;
@@ -80,8 +79,6 @@ struct ice_sq_cd {
 	struct ice_aq_desc *wb_desc;
 };
 
-#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
-
 /* rq event information */
 struct ice_rq_event_info {
 	struct ice_aq_desc desc;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index a94e7072b570..a7c510832824 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -187,6 +187,7 @@ void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
 		vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
 		break;
 	case ICE_VSI_CHNL:
+	case ICE_VSI_SF:
 		vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
 		vsi->tc_cfg.numtc = 1;
 		break;
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index f182179529b7..953262b88a58 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -289,11 +289,11 @@ void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
  * indicates a base offset of 10, and the index for the entry is 2, then
  * section handler function should set the offset to 10 + 2 = 12.
  */
-static void *ice_pkg_enum_entry(struct ice_seg *ice_seg,
-				struct ice_pkg_enum *state, u32 sect_type,
-				u32 *offset,
-				void *(*handler)(u32 sect_type, void *section,
-						 u32 index, u32 *offset))
+void *ice_pkg_enum_entry(struct ice_seg *ice_seg,
+			 struct ice_pkg_enum *state, u32 sect_type,
+			 u32 *offset,
+			 void *(*handler)(u32 sect_type, void *section,
+					  u32 index, u32 *offset))
 {
 	void *entry;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h
index 622543f08b43..97f272317475 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.h
@@ -261,10 +261,17 @@ struct ice_meta_sect {
 #define ICE_SID_CDID_KEY_BUILDER_RSS 47
 #define ICE_SID_CDID_REDIR_RSS 48
 
+#define ICE_SID_RXPARSER_CAM           50
+#define ICE_SID_RXPARSER_NOMATCH_CAM   51
+#define ICE_SID_RXPARSER_IMEM          52
 #define ICE_SID_RXPARSER_MARKER_PTYPE 55
 #define ICE_SID_RXPARSER_BOOST_TCAM 56
+#define ICE_SID_RXPARSER_PROTO_GRP     57
 #define ICE_SID_RXPARSER_METADATA_INIT 58
 #define ICE_SID_TXPARSER_BOOST_TCAM 66
+#define ICE_SID_RXPARSER_MARKER_GRP    72
+#define ICE_SID_RXPARSER_PG_SPILL      76
+#define ICE_SID_RXPARSER_NOMATCH_SPILL 78
 
 #define ICE_SID_XLT0_PE 80
 #define ICE_SID_XLT_KEY_BUILDER_PE 81
@@ -276,6 +283,7 @@ struct ice_meta_sect {
 #define ICE_SID_CDID_KEY_BUILDER_PE 87
 #define ICE_SID_CDID_REDIR_PE 88
 
+#define ICE_SID_RXPARSER_FLAG_REDIR	97
 /* Label Metadata section IDs */
 #define ICE_SID_LBL_FIRST 0x80000010
 #define ICE_SID_LBL_RXPARSER_TMEM 0x80000018
@@ -451,6 +459,11 @@ int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count);
 
 int ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count);
 u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld);
+void *
+ice_pkg_enum_entry(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+		   u32 sect_type, u32 *offset,
+		   void *(*handler)(u32 sect_type, void *section,
+				    u32 index, u32 *offset));
 void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
 			   u32 sect_type);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index e92be6f130a3..cd95705d1e7f 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -9,6 +9,7 @@
 #define ICE_CGU_STATE_ACQ_ERR_THRESHOLD		50
 #define ICE_DPLL_PIN_IDX_INVALID		0xff
 #define ICE_DPLL_RCLK_NUM_PER_PF		1
+#define ICE_DPLL_PIN_ESYNC_PULSE_HIGH_PERCENT	25
 
 /**
  * enum ice_dpll_pin_type - enumerate ice pin types:
@@ -30,6 +31,10 @@ static const char * const pin_type_name[] = {
 	[ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
 };
 
+static const struct dpll_pin_frequency ice_esync_range[] = {
+	DPLL_PIN_FREQUENCY_RANGE(0, DPLL_PIN_FREQUENCY_1_HZ),
+};
+
 /**
  * ice_dpll_is_reset - check if reset is in progress
  * @pf: private board structure
@@ -394,8 +399,8 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
 
 	switch (pin_type) {
 	case ICE_DPLL_PIN_TYPE_INPUT:
-		ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL,
-					       NULL, &pin->flags[0],
+		ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, &pin->status,
+					       NULL, NULL, &pin->flags[0],
 					       &pin->freq, &pin->phase_adjust);
 		if (ret)
 			goto err;
@@ -430,7 +435,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
 			goto err;
 
 		parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL;
-		if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) {
+		if (ICE_AQC_GET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) {
 			pin->state[pf->dplls.eec.dpll_idx] =
 				parent == pf->dplls.eec.dpll_idx ?
 				DPLL_PIN_STATE_CONNECTED :
@@ -1099,6 +1104,214 @@ ice_dpll_phase_offset_get(const struct dpll_pin *pin, void *pin_priv,
 }
 
 /**
+ * ice_dpll_output_esync_set - callback for setting embedded sync
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @freq: requested embedded sync frequency
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for setting embedded sync frequency value
+ * on output pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error
+ */
+static int
+ice_dpll_output_esync_set(const struct dpll_pin *pin, void *pin_priv,
+			  const struct dpll_device *dpll, void *dpll_priv,
+			  u64 freq, struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	u8 flags = 0;
+	int ret;
+
+	if (ice_dpll_is_reset(pf, extack))
+		return -EBUSY;
+	mutex_lock(&pf->dplls.lock);
+	if (p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_OUT_EN)
+		flags = ICE_AQC_SET_CGU_OUT_CFG_OUT_EN;
+	if (freq == DPLL_PIN_FREQUENCY_1_HZ) {
+		if (p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) {
+			ret = 0;
+		} else {
+			flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+			ret = ice_aq_set_output_pin_cfg(&pf->hw, p->idx, flags,
+							0, 0, 0);
+		}
+	} else {
+		if (!(p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)) {
+			ret = 0;
+		} else {
+			flags &= ~ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+			ret = ice_aq_set_output_pin_cfg(&pf->hw, p->idx, flags,
+							0, 0, 0);
+		}
+	}
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_output_esync_get - callback for getting embedded sync config
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @esync: on success holds embedded sync pin properties
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting embedded sync frequency value
+ * and capabilities on output pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error
+ */
+static int
+ice_dpll_output_esync_get(const struct dpll_pin *pin, void *pin_priv,
+			  const struct dpll_device *dpll, void *dpll_priv,
+			  struct dpll_pin_esync *esync,
+			  struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	if (ice_dpll_is_reset(pf, extack))
+		return -EBUSY;
+	mutex_lock(&pf->dplls.lock);
+	if (!(p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_ABILITY) ||
+	    p->freq != DPLL_PIN_FREQUENCY_10_MHZ) {
+		mutex_unlock(&pf->dplls.lock);
+		return -EOPNOTSUPP;
+	}
+	esync->range = ice_esync_range;
+	esync->range_num = ARRAY_SIZE(ice_esync_range);
+	if (p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) {
+		esync->freq = DPLL_PIN_FREQUENCY_1_HZ;
+		esync->pulse = ICE_DPLL_PIN_ESYNC_PULSE_HIGH_PERCENT;
+	} else {
+		esync->freq = 0;
+		esync->pulse = 0;
+	}
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_input_esync_set - callback for setting embedded sync
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @freq: requested embedded sync frequency
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for setting embedded sync frequency value
+ * on input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error
+ */
+static int
+ice_dpll_input_esync_set(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 u64 freq, struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	u8 flags_en = 0;
+	int ret;
+
+	if (ice_dpll_is_reset(pf, extack))
+		return -EBUSY;
+	mutex_lock(&pf->dplls.lock);
+	if (p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN)
+		flags_en = ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN;
+	if (freq == DPLL_PIN_FREQUENCY_1_HZ) {
+		if (p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN) {
+			ret = 0;
+		} else {
+			flags_en |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+			ret = ice_aq_set_input_pin_cfg(&pf->hw, p->idx, 0,
+						       flags_en, 0, 0);
+		}
+	} else {
+		if (!(p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN)) {
+			ret = 0;
+		} else {
+			flags_en &= ~ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+			ret = ice_aq_set_input_pin_cfg(&pf->hw, p->idx, 0,
+						       flags_en, 0, 0);
+		}
+	}
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_input_esync_get - callback for getting embedded sync config
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @esync: on success holds embedded sync pin properties
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting embedded sync frequency value
+ * and capabilities on input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error
+ */
+static int
+ice_dpll_input_esync_get(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 struct dpll_pin_esync *esync,
+			 struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	if (ice_dpll_is_reset(pf, extack))
+		return -EBUSY;
+	mutex_lock(&pf->dplls.lock);
+	if (!(p->status & ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_CAP) ||
+	    p->freq != DPLL_PIN_FREQUENCY_10_MHZ) {
+		mutex_unlock(&pf->dplls.lock);
+		return -EOPNOTSUPP;
+	}
+	esync->range = ice_esync_range;
+	esync->range_num = ARRAY_SIZE(ice_esync_range);
+	if (p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN) {
+		esync->freq = DPLL_PIN_FREQUENCY_1_HZ;
+		esync->pulse = ICE_DPLL_PIN_ESYNC_PULSE_HIGH_PERCENT;
+	} else {
+		esync->freq = 0;
+		esync->pulse = 0;
+	}
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
  * ice_dpll_rclk_state_on_pin_set - set a state on rclk pin
  * @pin: pointer to a pin
  * @pin_priv: private data pointer passed on pin registration
@@ -1222,6 +1435,8 @@ static const struct dpll_pin_ops ice_dpll_input_ops = {
 	.phase_adjust_get = ice_dpll_pin_phase_adjust_get,
 	.phase_adjust_set = ice_dpll_input_phase_adjust_set,
 	.phase_offset_get = ice_dpll_phase_offset_get,
+	.esync_set = ice_dpll_input_esync_set,
+	.esync_get = ice_dpll_input_esync_get,
 };
 
 static const struct dpll_pin_ops ice_dpll_output_ops = {
@@ -1232,6 +1447,8 @@ static const struct dpll_pin_ops ice_dpll_output_ops = {
 	.direction_get = ice_dpll_output_direction,
 	.phase_adjust_get = ice_dpll_pin_phase_adjust_get,
 	.phase_adjust_set = ice_dpll_output_phase_adjust_set,
+	.esync_set = ice_dpll_output_esync_set,
+	.esync_get = ice_dpll_output_esync_get,
 };
 
 static const struct dpll_device_ops ice_dpll_ops = {
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h
index 93172e93995b..c320f1bf7d6d 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.h
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.h
@@ -31,6 +31,7 @@ struct ice_dpll_pin {
 	struct dpll_pin_properties prop;
 	u32 freq;
 	s32 phase_adjust;
+	u8 status;
 };
 
 /** ice_dpll - store info required for DPLL control
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index 3cfa071e3718..c0b3e70a7ea3 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -452,11 +452,9 @@ static void ice_eswitch_start_reprs(struct ice_pf *pf)
 	ice_eswitch_start_all_tx_queues(pf);
 }
 
-int
-ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
+static int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_repr *repr, unsigned long *id)
 {
-	struct devlink *devlink = priv_to_devlink(pf);
-	struct ice_repr *repr;
 	int err;
 
 	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
@@ -470,13 +468,9 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
 
 	ice_eswitch_stop_reprs(pf);
 
-	devl_lock(devlink);
-	repr = ice_repr_add_vf(vf);
-	devl_unlock(devlink);
-	if (IS_ERR(repr)) {
-		err = PTR_ERR(repr);
+	err = repr->ops.add(repr);
+	if (err)
 		goto err_create_repr;
-	}
 
 	err = ice_eswitch_setup_repr(pf, repr);
 	if (err)
@@ -486,7 +480,7 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
 	if (err)
 		goto err_xa_alloc;
 
-	vf->repr_id = repr->id;
+	*id = repr->id;
 
 	ice_eswitch_start_reprs(pf);
 
@@ -495,9 +489,7 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
 err_xa_alloc:
 	ice_eswitch_release_repr(pf, repr);
 err_setup_repr:
-	devl_lock(devlink);
-	ice_repr_rem_vf(repr);
-	devl_unlock(devlink);
+	repr->ops.rem(repr);
 err_create_repr:
 	if (xa_empty(&pf->eswitch.reprs))
 		ice_eswitch_disable_switchdev(pf);
@@ -506,14 +498,59 @@ err_create_repr:
 	return err;
 }
 
-void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf)
+/**
+ * ice_eswitch_attach_vf - attach VF to a eswitch
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF structure to be attached
+ *
+ * During attaching port representor for VF is created.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf)
 {
-	struct ice_repr *repr = xa_load(&pf->eswitch.reprs, vf->repr_id);
+	struct ice_repr *repr = ice_repr_create_vf(vf);
 	struct devlink *devlink = priv_to_devlink(pf);
+	int err;
 
-	if (!repr)
-		return;
+	if (IS_ERR(repr))
+		return PTR_ERR(repr);
+
+	devl_lock(devlink);
+	err = ice_eswitch_attach(pf, repr, &vf->repr_id);
+	if (err)
+		ice_repr_destroy(repr);
+	devl_unlock(devlink);
+
+	return err;
+}
+
+/**
+ * ice_eswitch_attach_sf - attach SF to a eswitch
+ * @pf: pointer to PF structure
+ * @sf: pointer to SF structure to be attached
+ *
+ * During attaching port representor for SF is created.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_eswitch_attach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf)
+{
+	struct ice_repr *repr = ice_repr_create_sf(sf);
+	int err;
 
+	if (IS_ERR(repr))
+		return PTR_ERR(repr);
+
+	err = ice_eswitch_attach(pf, repr, &sf->repr_id);
+	if (err)
+		ice_repr_destroy(repr);
+
+	return err;
+}
+
+static void ice_eswitch_detach(struct ice_pf *pf, struct ice_repr *repr)
+{
 	ice_eswitch_stop_reprs(pf);
 	xa_erase(&pf->eswitch.reprs, repr->id);
 
@@ -521,10 +558,12 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf)
 		ice_eswitch_disable_switchdev(pf);
 
 	ice_eswitch_release_repr(pf, repr);
-	devl_lock(devlink);
-	ice_repr_rem_vf(repr);
+	repr->ops.rem(repr);
+	ice_repr_destroy(repr);
 
 	if (xa_empty(&pf->eswitch.reprs)) {
+		struct devlink *devlink = priv_to_devlink(pf);
+
 		/* since all port representors are destroyed, there is
 		 * no point in keeping the nodes
 		 */
@@ -533,10 +572,42 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf)
 	} else {
 		ice_eswitch_start_reprs(pf);
 	}
+}
+
+/**
+ * ice_eswitch_detach_vf - detach VF from a eswitch
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF structure to be detached
+ */
+void ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf)
+{
+	struct ice_repr *repr = xa_load(&pf->eswitch.reprs, vf->repr_id);
+	struct devlink *devlink = priv_to_devlink(pf);
+
+	if (!repr)
+		return;
+
+	devl_lock(devlink);
+	ice_eswitch_detach(pf, repr);
 	devl_unlock(devlink);
 }
 
 /**
+ * ice_eswitch_detach_sf - detach SF from a eswitch
+ * @pf: pointer to PF structure
+ * @sf: pointer to SF structure to be detached
+ */
+void ice_eswitch_detach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf)
+{
+	struct ice_repr *repr = xa_load(&pf->eswitch.reprs, sf->repr_id);
+
+	if (!repr)
+		return;
+
+	ice_eswitch_detach(pf, repr);
+}
+
+/**
  * ice_eswitch_get_target - get netdev based on src_vsi from descriptor
  * @rx_ring: ring used to receive the packet
  * @rx_desc: descriptor used to get src_vsi value
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
index 78fd39a6935d..20ce32dda69c 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.h
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -5,11 +5,13 @@
 #define _ICE_ESWITCH_H_
 
 #include <net/devlink.h>
+#include "devlink/devlink_port.h"
 
 #ifdef CONFIG_ICE_SWITCHDEV
-void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf);
-int
-ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf);
+void ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf);
+void ice_eswitch_detach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf);
+int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf);
+int ice_eswitch_attach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf);
 
 int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 int
@@ -31,10 +33,20 @@ struct net_device *ice_eswitch_get_target(struct ice_rx_ring *rx_ring,
 int ice_eswitch_cfg_vsi(struct ice_vsi *vsi, const u8 *mac);
 void ice_eswitch_decfg_vsi(struct ice_vsi *vsi, const u8 *mac);
 #else /* CONFIG_ICE_SWITCHDEV */
-static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { }
+static inline void
+ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf) { }
+
+static inline void
+ice_eswitch_detach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf) { }
+
+static inline int
+ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return -EOPNOTSUPP;
+}
 
 static inline int
-ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
+ice_eswitch_attach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 8c990c976132..d5cc934d1359 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3792,8 +3792,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
 		return ethtool_op_get_ts_info(dev, info);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -4414,7 +4412,7 @@ ice_repr_get_drvinfo(struct net_device *netdev,
 {
 	struct ice_repr *repr = ice_netdev_to_repr(netdev);
 
-	if (ice_check_vf_ready_for_cfg(repr->vf))
+	if (repr->ops.ready(repr))
 		return;
 
 	__ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
@@ -4426,8 +4424,7 @@ ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	struct ice_repr *repr = ice_netdev_to_repr(netdev);
 
 	/* for port representors only ETH_SS_STATS is supported */
-	if (ice_check_vf_ready_for_cfg(repr->vf) ||
-	    stringset != ETH_SS_STATS)
+	if (repr->ops.ready(repr) || stringset != ETH_SS_STATS)
 		return;
 
 	__ice_get_strings(netdev, stringset, data, repr->src_vsi);
@@ -4440,7 +4437,7 @@ ice_repr_get_ethtool_stats(struct net_device *netdev,
 {
 	struct ice_repr *repr = ice_netdev_to_repr(netdev);
 
-	if (ice_check_vf_ready_for_cfg(repr->vf))
+	if (repr->ops.ready(repr))
 		return;
 
 	__ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi);
@@ -4673,10 +4670,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
 	if (err)
 		return err;
 
-	fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) +
-						 fec_corr_low_val;
-	fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) +
-					     fec_uncorr_low_val;
+	fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) +
+					     fec_corr_low_val;
+	fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) +
+						 fec_uncorr_low_val;
 	return 0;
 }
 
@@ -4725,6 +4722,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
 				     ETHTOOL_COALESCE_USE_ADAPTIVE |
 				     ETHTOOL_COALESCE_RX_USECS_HIGH,
 	.cap_rss_sym_xor_supported = true,
+	.rxfh_per_ctx_key	= true,
 	.get_link_ksettings	= ice_get_link_ksettings,
 	.set_link_ksettings	= ice_set_link_ksettings,
 	.get_fec_stats		= ice_get_fec_stats,
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 20d5db88c99f..ed95072ca6e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2981,6 +2981,50 @@ ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
 }
 
 /**
+ * ice_disable_fd_swap - set register appropriately to disable FD SWAP
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ */
+static void
+ice_disable_fd_swap(struct ice_hw *hw, u8 prof_id)
+{
+	u16 swap_val, fvw_num;
+	unsigned int i;
+
+	swap_val = ICE_SWAP_VALID;
+	fvw_num = hw->blk[ICE_BLK_FD].es.fvw / ICE_FDIR_REG_SET_SIZE;
+
+	/* Since the SWAP Flag in the Programming Desc doesn't work,
+	 * here add method to disable the SWAP Option via setting
+	 * certain SWAP and INSET register sets.
+	 */
+	for (i = 0; i < fvw_num ; i++) {
+		u32 raw_swap, raw_in;
+		unsigned int j;
+
+		raw_swap = 0;
+		raw_in = 0;
+
+		for (j = 0; j < ICE_FDIR_REG_SET_SIZE; j++) {
+			raw_swap |= (swap_val++) << (j * BITS_PER_BYTE);
+			raw_in |= ICE_INSET_DFLT << (j * BITS_PER_BYTE);
+		}
+
+		/* write the FDIR swap register set */
+		wr32(hw, GLQF_FDSWAP(prof_id, i), raw_swap);
+
+		ice_debug(hw, ICE_DBG_INIT, "swap wr(%d, %d): 0x%x = 0x%08x\n",
+			  prof_id, i, GLQF_FDSWAP(prof_id, i), raw_swap);
+
+		/* write the FDIR inset register set */
+		wr32(hw, GLQF_FDINSET(prof_id, i), raw_in);
+
+		ice_debug(hw, ICE_DBG_INIT, "inset wr(%d, %d): 0x%x = 0x%08x\n",
+			  prof_id, i, GLQF_FDINSET(prof_id, i), raw_in);
+	}
+}
+
+/*
  * ice_add_prof - add profile
  * @hw: pointer to the HW struct
  * @blk: hardware block
@@ -2991,6 +3035,7 @@ ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
  * @es: extraction sequence (length of array is determined by the block)
  * @masks: mask for extraction sequence
  * @symm: symmetric setting for RSS profiles
+ * @fd_swap: enable/disable FDIR paired src/dst fields swap option
  *
  * This function registers a profile, which matches a set of PTYPES with a
  * particular extraction sequence. While the hardware profile is allocated
@@ -3000,7 +3045,7 @@ ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
 int
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
-	     struct ice_fv_word *es, u16 *masks, bool symm)
+	     struct ice_fv_word *es, u16 *masks, bool symm, bool fd_swap)
 {
 	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
 	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
@@ -3020,7 +3065,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 		status = ice_alloc_prof_id(hw, blk, &prof_id);
 		if (status)
 			goto err_ice_add_prof;
-		if (blk == ICE_BLK_FD) {
+		if (blk == ICE_BLK_FD && fd_swap) {
 			/* For Flow Director block, the extraction sequence may
 			 * need to be altered in the case where there are paired
 			 * fields that have no match. This is necessary because
@@ -3031,6 +3076,8 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 			status = ice_update_fd_swap(hw, prof_id, es);
 			if (status)
 				goto err_ice_add_prof;
+		} else if (blk == ICE_BLK_FD) {
+			ice_disable_fd_swap(hw, prof_id);
 		}
 		status = ice_update_prof_masking(hw, blk, prof_id, masks);
 		if (status)
@@ -4099,6 +4146,54 @@ err_ice_add_prof_id_flow:
 }
 
 /**
+ * ice_flow_assoc_fdir_prof - add an FDIR profile for main/ctrl VSI
+ * @hw: pointer to the HW struct
+ * @blk: HW block
+ * @dest_vsi: dest VSI
+ * @fdir_vsi: fdir programming VSI
+ * @hdl: profile handle
+ *
+ * Update the hardware tables to enable the FDIR profile indicated by @hdl for
+ * the VSI specified by @dest_vsi. On success, the flow will be enabled.
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+int
+ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk,
+			 u16 dest_vsi, u16 fdir_vsi, u64 hdl)
+{
+	u16 vsi_num;
+	int status;
+
+	if (blk != ICE_BLK_FD)
+		return -EINVAL;
+
+	vsi_num = ice_get_hw_vsi_num(hw, dest_vsi);
+	status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Adding HW profile failed for main VSI flow entry: %d\n",
+			  status);
+		return status;
+	}
+
+	vsi_num = ice_get_hw_vsi_num(hw, fdir_vsi);
+	status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Adding HW profile failed for ctrl VSI flow entry: %d\n",
+			  status);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	vsi_num = ice_get_hw_vsi_num(hw, dest_vsi);
+	ice_rem_prof_id_flow(hw, blk, vsi_num, hdl);
+
+	return status;
+}
+
+/**
  * ice_rem_prof_from_list - remove a profile from list
  * @hw: pointer to the HW struct
  * @lst: list to remove the profile from
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index b39d7cdc381f..90b9b0993122 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -6,6 +6,8 @@
 
 #include "ice_type.h"
 
+#define ICE_FDIR_REG_SET_SIZE	4
+
 int
 ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access);
 void ice_release_change_lock(struct ice_hw *hw);
@@ -42,13 +44,16 @@ bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype);
 int
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
-	     struct ice_fv_word *es, u16 *masks, bool symm);
+	     struct ice_fv_word *es, u16 *masks, bool symm, bool fd_swap);
 struct ice_prof_map *
 ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id);
 int
 ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
 int
 ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+int
+ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk,
+			 u16 dest_vsi, u16 fdir_vsi, u64 hdl);
 enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
 enum ice_ddp_state
 ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index fc2b58f56279..d97b751052f2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -409,6 +409,29 @@ static const u32 ice_ptypes_gtpc_tid[] = {
 };
 
 /* Packet types for GTPU */
+static const struct ice_ptype_attributes ice_attr_gtpu_session[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_SESSION },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_SESSION },
+};
+
 static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = {
 	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
 	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
@@ -1400,7 +1423,7 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
 	/* Add a HW profile for this flow profile */
 	status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
 			      params->attr, params->attr_cnt, params->es,
-			      params->mask, symm);
+			      params->mask, symm, true);
 	if (status) {
 		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
 		goto out;
@@ -1523,6 +1546,90 @@ ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
 	return status;
 }
 
+#define FLAG_GTP_EH_PDU_LINK	BIT_ULL(13)
+#define FLAG_GTP_EH_PDU		BIT_ULL(14)
+
+#define HI_BYTE_IN_WORD		GENMASK(15, 8)
+#define LO_BYTE_IN_WORD		GENMASK(7, 0)
+
+#define FLAG_GTPU_MSK	\
+	(FLAG_GTP_EH_PDU | FLAG_GTP_EH_PDU_LINK)
+#define FLAG_GTPU_UP	\
+	(FLAG_GTP_EH_PDU | FLAG_GTP_EH_PDU_LINK)
+#define FLAG_GTPU_DW	FLAG_GTP_EH_PDU
+
+/**
+ * ice_flow_set_parser_prof - Set flow profile based on the parsed profile info
+ * @hw: pointer to the HW struct
+ * @dest_vsi: dest VSI
+ * @fdir_vsi: fdir programming VSI
+ * @prof: stores parsed profile info from raw flow
+ * @blk: classification blk
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+int
+ice_flow_set_parser_prof(struct ice_hw *hw, u16 dest_vsi, u16 fdir_vsi,
+			 struct ice_parser_profile *prof, enum ice_block blk)
+{
+	u64 id = find_first_bit(prof->ptypes, ICE_FLOW_PTYPE_MAX);
+	struct ice_flow_prof_params *params __free(kfree);
+	u8 fv_words = hw->blk[blk].es.fvw;
+	int status;
+	int i, idx;
+
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	for (i = 0; i < ICE_MAX_FV_WORDS; i++) {
+		params->es[i].prot_id = ICE_PROT_INVALID;
+		params->es[i].off = ICE_FV_OFFSET_INVAL;
+	}
+
+	for (i = 0; i < prof->fv_num; i++) {
+		if (hw->blk[blk].es.reverse)
+			idx = fv_words - i - 1;
+		else
+			idx = i;
+		params->es[idx].prot_id = prof->fv[i].proto_id;
+		params->es[idx].off = prof->fv[i].offset;
+		params->mask[idx] = (((prof->fv[i].msk) << BITS_PER_BYTE) &
+				      HI_BYTE_IN_WORD) |
+				    (((prof->fv[i].msk) >> BITS_PER_BYTE) &
+				      LO_BYTE_IN_WORD);
+	}
+
+	switch (prof->flags) {
+	case FLAG_GTPU_DW:
+		params->attr = ice_attr_gtpu_down;
+		params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down);
+		break;
+	case FLAG_GTPU_UP:
+		params->attr = ice_attr_gtpu_up;
+		params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up);
+		break;
+	default:
+		if (prof->flags_msk & FLAG_GTPU_MSK) {
+			params->attr = ice_attr_gtpu_session;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_session);
+		}
+		break;
+	}
+
+	status = ice_add_prof(hw, blk, id, (u8 *)prof->ptypes,
+			      params->attr, params->attr_cnt,
+			      params->es, params->mask, false, false);
+	if (status)
+		return status;
+
+	status = ice_flow_assoc_fdir_prof(hw, blk, dest_vsi, fdir_vsi, id);
+	if (status)
+		ice_rem_prof(hw, blk, id);
+
+	return status;
+}
+
 /**
  * ice_flow_add_prof - Add a flow profile for packet segments and matched fields
  * @hw: pointer to the HW struct
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 2fd2e0cb483d..6cb7bb879c98 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -5,6 +5,7 @@
 #define _ICE_FLOW_H_
 
 #include "ice_flex_type.h"
+#include "ice_parser.h"
 
 #define ICE_FLOW_ENTRY_HANDLE_INVAL	0
 #define ICE_FLOW_FLD_OFF_INVAL		0xffff
@@ -326,6 +327,7 @@ enum ice_rss_cfg_hdr_type {
 	ICE_RSS_ANY_HEADERS
 };
 
+struct ice_vsi;
 struct ice_rss_hash_cfg {
 	u32 addl_hdrs; /* protocol header fields */
 	u64 hash_flds; /* hash bit field (ICE_FLOW_HASH_*) to configure */
@@ -445,6 +447,9 @@ ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
 		  bool symm, struct ice_flow_prof **prof);
 int ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id);
 int
+ice_flow_set_parser_prof(struct ice_hw *hw, u16 dest_vsi, u16 fdir_vsi,
+			 struct ice_parser_profile *prof, enum ice_block blk);
+int
 ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
 		   u64 entry_id, u16 vsi, enum ice_flow_priority prio,
 		   void *data, u64 *entry_h);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f559e60992fa..06e712cdc3d9 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -7,6 +7,7 @@
 #include "ice_lib.h"
 #include "ice_fltr.h"
 #include "ice_dcb_lib.h"
+#include "ice_type.h"
 #include "ice_vsi_vlan_ops.h"
 
 /**
@@ -20,6 +21,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
 		return "ICE_VSI_PF";
 	case ICE_VSI_VF:
 		return "ICE_VSI_VF";
+	case ICE_VSI_SF:
+		return "ICE_VSI_SF";
 	case ICE_VSI_CTRL:
 		return "ICE_VSI_CTRL";
 	case ICE_VSI_CHNL:
@@ -135,6 +138,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
 {
 	switch (vsi->type) {
 	case ICE_VSI_PF:
+	case ICE_VSI_SF:
 	case ICE_VSI_CTRL:
 	case ICE_VSI_LB:
 		/* a user could change the values of num_[tr]x_desc using
@@ -201,6 +205,12 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
 					   max_t(int, vsi->alloc_rxq,
 						 vsi->alloc_txq));
 		break;
+	case ICE_VSI_SF:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		vsi->num_q_vectors = 1;
+		vsi->irq_dyn_alloc = true;
+		break;
 	case ICE_VSI_VF:
 		if (vf->num_req_qs)
 			vf->num_vf_qs = vf->num_req_qs;
@@ -423,7 +433,7 @@ err_out:
  * This deallocates the VSI's queue resources, removes it from the PF's
  * VSI array if necessary, and deallocates the VSI
  */
-static void ice_vsi_free(struct ice_vsi *vsi)
+void ice_vsi_free(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = NULL;
 	struct device *dev;
@@ -447,6 +457,7 @@ static void ice_vsi_free(struct ice_vsi *vsi)
 
 	ice_vsi_free_stats(vsi);
 	ice_vsi_free_arrays(vsi);
+	mutex_destroy(&vsi->xdp_state_lock);
 	mutex_unlock(&pf->sw_mutex);
 	devm_kfree(dev, vsi);
 }
@@ -558,6 +569,7 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch)
 
 	switch (vsi->type) {
 	case ICE_VSI_PF:
+	case ICE_VSI_SF:
 		/* Setup default MSIX irq handler for VSI */
 		vsi->irq_handler = ice_msix_clean_rings;
 		break;
@@ -594,7 +606,7 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch)
  *
  * returns a pointer to a VSI on success, NULL on failure.
  */
-static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
+struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_vsi *vsi = NULL;
@@ -626,6 +638,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
 	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
 					 pf->next_vsi);
 
+	mutex_init(&vsi->xdp_state_lock);
+
 unlock_pf:
 	mutex_unlock(&pf->sw_mutex);
 	return vsi;
@@ -886,6 +900,11 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 					      max_rss_size);
 		vsi->rss_lut_type = ICE_LUT_PF;
 		break;
+	case ICE_VSI_SF:
+		vsi->rss_table_size = ICE_LUT_VSI_SIZE;
+		vsi->rss_size = min_t(u16, num_online_cpus(), max_rss_size);
+		vsi->rss_lut_type = ICE_LUT_VSI;
+		break;
 	case ICE_VSI_VF:
 		/* VF VSI will get a small RSS table.
 		 * For VSI_LUT, LUT size should be set to 64 bytes.
@@ -1133,6 +1152,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
 		break;
 	case ICE_VSI_VF:
+	case ICE_VSI_SF:
 		/* VF VSI will gets a small RSS table which is a VSI LUT type */
 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
 		break;
@@ -1211,6 +1231,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, u32 vsi_flags)
 	case ICE_VSI_PF:
 		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
 		break;
+	case ICE_VSI_SF:
 	case ICE_VSI_CHNL:
 		ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2;
 		break;
@@ -2092,6 +2113,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi)
 	case ICE_VSI_CHNL:
 	case ICE_VSI_LB:
 	case ICE_VSI_PF:
+	case ICE_VSI_SF:
 		max_agg_nodes = ICE_MAX_PF_AGG_NODES;
 		agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
 		agg_node_iter = &pf->pf_agg_node[0];
@@ -2261,6 +2283,7 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
 
 	switch (vsi->type) {
 	case ICE_VSI_CTRL:
+	case ICE_VSI_SF:
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
 		if (ret)
@@ -2286,9 +2309,6 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
 
 		ice_vsi_map_rings_to_vectors(vsi);
 
-		/* Associate q_vector rings to napi */
-		ice_vsi_set_napi_queues(vsi);
-
 		vsi->stat_offsets_loaded = false;
 
 		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
@@ -2413,20 +2433,13 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
 	struct ice_pf *pf = vsi->back;
 	int err;
 
-	/* The Rx rule will only exist to remove if the LLDP FW
-	 * engine is currently stopped
-	 */
-	if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
-	    !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
-		ice_cfg_sw_lldp(vsi, false, false);
-
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
 	if (err)
 		dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
 			vsi->vsi_num, err);
 
-	if (ice_is_xdp_ena_vsi(vsi))
+	if (vsi->xdp_rings)
 		/* return value check can be skipped here, it always returns
 		 * 0 if reset is in progress
 		 */
@@ -2528,7 +2541,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
 			ice_write_itr(&q_vector->tx, 0);
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
-			if (ice_is_xdp_ena_vsi(vsi)) {
+			if (vsi->xdp_rings) {
 				u32 xdp_txq = txq + vsi->num_xdp_txq;
 
 				wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
@@ -2628,6 +2641,7 @@ void ice_vsi_close(struct ice_vsi *vsi)
 	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
 		ice_down(vsi);
 
+	ice_vsi_clear_napi_queues(vsi);
 	ice_vsi_free_irq(vsi);
 	ice_vsi_free_tx_rings(vsi);
 	ice_vsi_free_rx_rings(vsi);
@@ -2647,7 +2661,8 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
 
 	clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
 
-	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+	if (vsi->netdev && (vsi->type == ICE_VSI_PF ||
+			    vsi->type == ICE_VSI_SF)) {
 		if (netif_running(vsi->netdev)) {
 			if (!locked)
 				rtnl_lock();
@@ -2671,143 +2686,99 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
  */
 void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
 {
-	if (test_bit(ICE_VSI_DOWN, vsi->state))
-		return;
+	bool already_down = test_bit(ICE_VSI_DOWN, vsi->state);
 
 	set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
 
-	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+	if (vsi->netdev && (vsi->type == ICE_VSI_PF ||
+			    vsi->type == ICE_VSI_SF)) {
 		if (netif_running(vsi->netdev)) {
 			if (!locked)
 				rtnl_lock();
-
-			ice_vsi_close(vsi);
+			already_down = test_bit(ICE_VSI_DOWN, vsi->state);
+			if (!already_down)
+				ice_vsi_close(vsi);
 
 			if (!locked)
 				rtnl_unlock();
-		} else {
+		} else if (!already_down) {
 			ice_vsi_close(vsi);
 		}
-	} else if (vsi->type == ICE_VSI_CTRL) {
+	} else if (vsi->type == ICE_VSI_CTRL && !already_down) {
 		ice_vsi_close(vsi);
 	}
 }
 
 /**
- * __ice_queue_set_napi - Set the napi instance for the queue
- * @dev: device to which NAPI and queue belong
- * @queue_index: Index of queue
- * @type: queue type as RX or TX
- * @napi: NAPI context
- * @locked: is the rtnl_lock already held
- *
- * Set the napi instance for the queue. Caller indicates the lock status.
- */
-static void
-__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
-		     enum netdev_queue_type type, struct napi_struct *napi,
-		     bool locked)
-{
-	if (!locked)
-		rtnl_lock();
-	netif_queue_set_napi(dev, queue_index, type, napi);
-	if (!locked)
-		rtnl_unlock();
-}
-
-/**
- * ice_queue_set_napi - Set the napi instance for the queue
- * @vsi: VSI being configured
- * @queue_index: Index of queue
- * @type: queue type as RX or TX
- * @napi: NAPI context
+ * ice_vsi_set_napi_queues - associate netdev queues with napi
+ * @vsi: VSI pointer
  *
- * Set the napi instance for the queue. The rtnl lock state is derived from the
- * execution path.
+ * Associate queue[s] with napi for all vectors.
+ * The caller must hold rtnl_lock.
  */
-void
-ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
-		   enum netdev_queue_type type, struct napi_struct *napi)
+void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf = vsi->back;
+	struct net_device *netdev = vsi->netdev;
+	int q_idx, v_idx;
 
-	if (!vsi->netdev)
+	if (!netdev)
 		return;
 
-	if (current_work() == &pf->serv_task ||
-	    test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
-	    test_bit(ICE_DOWN, pf->state) ||
-	    test_bit(ICE_SUSPENDED, pf->state))
-		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
-				     false);
-	else
-		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
-				     true);
-}
+	ice_for_each_rxq(vsi, q_idx)
+		netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX,
+				     &vsi->rx_rings[q_idx]->q_vector->napi);
 
-/**
- * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
- * @q_vector: q_vector pointer
- * @locked: is the rtnl_lock already held
- *
- * Associate the q_vector napi with all the queue[s] on the vector.
- * Caller indicates the lock status.
- */
-void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
-{
-	struct ice_rx_ring *rx_ring;
-	struct ice_tx_ring *tx_ring;
-
-	ice_for_each_rx_ring(rx_ring, q_vector->rx)
-		__ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
-				     NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
-				     locked);
-
-	ice_for_each_tx_ring(tx_ring, q_vector->tx)
-		__ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
-				     NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
-				     locked);
+	ice_for_each_txq(vsi, q_idx)
+		netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX,
+				     &vsi->tx_rings[q_idx]->q_vector->napi);
 	/* Also set the interrupt number for the NAPI */
-	netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+
+		netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+	}
 }
 
 /**
- * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
- * @q_vector: q_vector pointer
+ * ice_vsi_clear_napi_queues - dissociate netdev queues from napi
+ * @vsi: VSI pointer
  *
- * Associate the q_vector napi with all the queue[s] on the vector
+ * Clear the association between all VSI queues queue[s] and napi.
+ * The caller must hold rtnl_lock.
  */
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector)
+void ice_vsi_clear_napi_queues(struct ice_vsi *vsi)
 {
-	struct ice_rx_ring *rx_ring;
-	struct ice_tx_ring *tx_ring;
+	struct net_device *netdev = vsi->netdev;
+	int q_idx;
 
-	ice_for_each_rx_ring(rx_ring, q_vector->rx)
-		ice_queue_set_napi(q_vector->vsi, rx_ring->q_index,
-				   NETDEV_QUEUE_TYPE_RX, &q_vector->napi);
+	if (!netdev)
+		return;
 
-	ice_for_each_tx_ring(tx_ring, q_vector->tx)
-		ice_queue_set_napi(q_vector->vsi, tx_ring->q_index,
-				   NETDEV_QUEUE_TYPE_TX, &q_vector->napi);
-	/* Also set the interrupt number for the NAPI */
-	netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+	ice_for_each_txq(vsi, q_idx)
+		netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX, NULL);
+
+	ice_for_each_rxq(vsi, q_idx)
+		netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, NULL);
 }
 
 /**
- * ice_vsi_set_napi_queues
- * @vsi: VSI pointer
+ * ice_napi_add - register NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be registered
  *
- * Associate queue[s] with napi for all vectors
+ * This function is only called in the driver's load path. Registering the NAPI
+ * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
+ * reset/rebuild, etc.)
  */
-void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
+void ice_napi_add(struct ice_vsi *vsi)
 {
-	int i;
+	int v_idx;
 
 	if (!vsi->netdev)
 		return;
 
-	ice_for_each_q_vector(vsi, i)
-		ice_q_vector_set_napi_queues(vsi->q_vectors[i]);
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
+			       ice_napi_poll);
 }
 
 /**
@@ -2828,6 +2799,14 @@ int ice_vsi_release(struct ice_vsi *vsi)
 		ice_rss_clean(vsi);
 
 	ice_vsi_close(vsi);
+
+	/* The Rx rule will only exist to remove if the LLDP FW
+	 * engine is currently stopped
+	 */
+	if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
+	    !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+		ice_cfg_sw_lldp(vsi, false, false);
+
 	ice_vsi_decfg(vsi);
 
 	/* retain SW VSI data structure since it is needed to unregister and
@@ -3039,19 +3018,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
 		return -EINVAL;
 
+	mutex_lock(&vsi->xdp_state_lock);
+
 	ret = ice_vsi_realloc_stat_arrays(vsi);
 	if (ret)
-		goto err_vsi_cfg;
+		goto unlock;
 
 	ice_vsi_decfg(vsi);
 	ret = ice_vsi_cfg_def(vsi);
 	if (ret)
-		goto err_vsi_cfg;
+		goto unlock;
 
 	coalesce = kcalloc(vsi->num_q_vectors,
 			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
-	if (!coalesce)
-		return -ENOMEM;
+	if (!coalesce) {
+		ret = -ENOMEM;
+		goto decfg;
+	}
 
 	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
 
@@ -3059,22 +3042,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 	if (ret) {
 		if (vsi_flags & ICE_VSI_FLAG_INIT) {
 			ret = -EIO;
-			goto err_vsi_cfg_tc_lan;
+			goto free_coalesce;
 		}
 
-		kfree(coalesce);
-		return ice_schedule_reset(pf, ICE_RESET_PFR);
+		ret = ice_schedule_reset(pf, ICE_RESET_PFR);
+		goto free_coalesce;
 	}
 
 	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
-	kfree(coalesce);
-
-	return 0;
+	clear_bit(ICE_VSI_REBUILD_PENDING, vsi->state);
 
-err_vsi_cfg_tc_lan:
-	ice_vsi_decfg(vsi);
+free_coalesce:
 	kfree(coalesce);
-err_vsi_cfg:
+decfg:
+	if (ret)
+		ice_vsi_decfg(vsi);
+unlock:
+	mutex_unlock(&vsi->xdp_state_lock);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 94ce8964dda6..1a6cfc8693ce 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -44,15 +44,10 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
 struct ice_vsi *
 ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params);
 
-void
-ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
-		   enum netdev_queue_type type, struct napi_struct *napi);
-
-void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
-
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector);
-
 void ice_vsi_set_napi_queues(struct ice_vsi *vsi);
+void ice_napi_add(struct ice_vsi *vsi);
+
+void ice_vsi_clear_napi_queues(struct ice_vsi *vsi);
 
 int ice_vsi_release(struct ice_vsi *vsi);
 
@@ -65,6 +60,8 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
 
 int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags);
 int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf);
+void ice_vsi_free(struct ice_vsi *vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index ec636be4d17d..eeb48cc48e08 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -15,6 +15,7 @@
 #include "ice_dcb_nl.h"
 #include "devlink/devlink.h"
 #include "devlink/devlink_port.h"
+#include "ice_sf_eth.h"
 #include "ice_hwmon.h"
 /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
  * ice tracepoint functions. This must be done exactly once across the
@@ -559,6 +560,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
 		return;
 
+	synchronize_irq(pf->oicr_irq.virq);
+
 	ice_unplug_aux_dev(pf);
 
 	/* Notify VFs of impending reset */
@@ -606,11 +609,15 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 			memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
 		}
 	}
+
+	if (vsi->netdev)
+		netif_device_detach(vsi->netdev);
 skip:
 
 	/* clear SW filtering DB */
 	ice_clear_hw_tbls(hw);
 	/* disable the VSIs and their queues that are not already DOWN */
+	set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state);
 	ice_pf_dis_all_vsi(pf, false);
 
 	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
@@ -2948,7 +2955,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
 	ice_for_each_rxq(vsi, i) {
 		struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
 
-		if (rx_ring->xsk_pool)
+		if (READ_ONCE(rx_ring->xsk_pool))
 			napi_schedule(&rx_ring->q_vector->napi);
 	}
 }
@@ -2968,6 +2975,9 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
 	if (avail < cpus / 2)
 		return -ENOMEM;
 
+	if (vsi->type == ICE_VSI_SF)
+		avail = vsi->alloc_txq;
+
 	vsi->num_xdp_txq = min_t(u16, avail, cpus);
 
 	if (vsi->num_xdp_txq < cpus)
@@ -2999,8 +3009,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 		   struct netlink_ext_ack *extack)
 {
 	unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
-	bool if_running = netif_running(vsi->netdev);
 	int ret = 0, xdp_ring_err = 0;
+	bool if_running;
 
 	if (prog && !prog->aux->xdp_has_frags) {
 		if (frame_size > ice_max_xdp_frame_size(vsi)) {
@@ -3011,13 +3021,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	}
 
 	/* hot swap progs and avoid toggling link */
-	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
+	if (ice_is_xdp_ena_vsi(vsi) == !!prog ||
+	    test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) {
 		ice_vsi_assign_bpf_prog(vsi, prog);
 		return 0;
 	}
 
+	if_running = netif_running(vsi->netdev) &&
+		     !test_and_set_bit(ICE_VSI_DOWN, vsi->state);
+
 	/* need to stop netdev while setting up the program for Rx rings */
-	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+	if (if_running) {
 		ret = ice_down(vsi);
 		if (ret) {
 			NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
@@ -3079,25 +3093,32 @@ static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
  * @dev: netdevice
  * @xdp: XDP command
  */
-static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct ice_netdev_priv *np = netdev_priv(dev);
 	struct ice_vsi *vsi = np->vsi;
+	int ret;
 
-	if (vsi->type != ICE_VSI_PF) {
-		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_SF) {
+		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF or SF VSI");
 		return -EINVAL;
 	}
 
+	mutex_lock(&vsi->xdp_state_lock);
+
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
-		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+		ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+		break;
 	case XDP_SETUP_XSK_POOL:
-		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
-					  xdp->xsk.queue_id);
+		ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id);
+		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
 	}
+
+	mutex_unlock(&vsi->xdp_state_lock);
+	return ret;
 }
 
 /**
@@ -3539,28 +3560,6 @@ skip_req_irq:
 }
 
 /**
- * ice_napi_add - register NAPI handler for the VSI
- * @vsi: VSI for which NAPI handler is to be registered
- *
- * This function is only called in the driver's load path. Registering the NAPI
- * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
- * reset/rebuild, etc.)
- */
-static void ice_napi_add(struct ice_vsi *vsi)
-{
-	int v_idx;
-
-	if (!vsi->netdev)
-		return;
-
-	ice_for_each_q_vector(vsi, v_idx) {
-		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
-			       ice_napi_poll);
-		__ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
-	}
-}
-
-/**
  * ice_set_ops - set netdev and ethtools ops for the given netdev
  * @vsi: the VSI associated with the new netdev
  */
@@ -3593,7 +3592,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
  * ice_set_netdev_features - set features for the given netdev
  * @netdev: netdev instance
  */
-static void ice_set_netdev_features(struct net_device *netdev)
+void ice_set_netdev_features(struct net_device *netdev)
 {
 	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
@@ -3775,8 +3774,7 @@ ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
  *
  * net_device_ops implementation for adding VLAN IDs
  */
-static int
-ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi_vlan_ops *vlan_ops;
@@ -3838,8 +3836,7 @@ finish:
  *
  * net_device_ops implementation for removing VLAN IDs
  */
-static int
-ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+int ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi_vlan_ops *vlan_ops;
@@ -4008,6 +4005,9 @@ static void ice_deinit_pf(struct ice_pf *pf)
 
 	if (pf->ptp.clock)
 		ptp_clock_unregister(pf->ptp.clock);
+
+	xa_destroy(&pf->dyn_ports);
+	xa_destroy(&pf->sf_nums);
 }
 
 /**
@@ -4101,6 +4101,9 @@ static int ice_init_pf(struct ice_pf *pf)
 	hash_init(pf->vfs.table);
 	ice_mbx_init_snapshot(&pf->hw);
 
+	xa_init(&pf->dyn_ports);
+	xa_init(&pf->sf_nums);
+
 	return 0;
 }
 
@@ -5348,7 +5351,6 @@ err_load:
 	ice_deinit(pf);
 err_init:
 	ice_adapter_put(pdev);
-	pci_disable_device(pdev);
 	return err;
 }
 
@@ -5443,6 +5445,7 @@ static void ice_remove(struct pci_dev *pdev)
 		ice_remove_arfs(pf);
 
 	devl_lock(priv_to_devlink(pf));
+	ice_dealloc_all_dynamic_ports(pf);
 	ice_deinit_devlink(pf);
 
 	ice_unload(pf);
@@ -5455,7 +5458,6 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_set_wake(pf);
 
 	ice_adapter_put(pdev);
-	pci_disable_device(pdev);
 }
 
 /**
@@ -5535,7 +5537,9 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
 		if (ret)
 			goto err_reinit;
 		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+		rtnl_lock();
 		ice_vsi_set_napi_queues(pf->vsi[v]);
+		rtnl_unlock();
 	}
 
 	ret = ice_req_irq_msix_misc(pf);
@@ -5549,8 +5553,12 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
 
 err_reinit:
 	while (v--)
-		if (pf->vsi[v])
+		if (pf->vsi[v]) {
+			rtnl_lock();
+			ice_vsi_clear_napi_queues(pf->vsi[v]);
+			rtnl_unlock();
 			ice_vsi_free_q_vectors(pf->vsi[v]);
+		}
 
 	return ret;
 }
@@ -5615,6 +5623,9 @@ static int ice_suspend(struct device *dev)
 	ice_for_each_vsi(pf, v) {
 		if (!pf->vsi[v])
 			continue;
+		rtnl_lock();
+		ice_vsi_clear_napi_queues(pf->vsi[v]);
+		rtnl_unlock();
 		ice_vsi_free_q_vectors(pf->vsi[v]);
 	}
 	ice_clear_interrupt_scheme(pf);
@@ -5922,8 +5933,16 @@ static int __init ice_module_init(void)
 		goto err_dest_lag_wq;
 	}
 
+	status = ice_sf_driver_register();
+	if (status) {
+		pr_err("Failed to register SF driver, err %d\n", status);
+		goto err_sf_driver;
+	}
+
 	return 0;
 
+err_sf_driver:
+	pci_unregister_driver(&ice_driver);
 err_dest_lag_wq:
 	destroy_workqueue(ice_lag_wq);
 	ice_debugfs_exit();
@@ -5941,6 +5960,7 @@ module_init(ice_module_init);
  */
 static void __exit ice_module_exit(void)
 {
+	ice_sf_driver_unregister();
 	pci_unregister_driver(&ice_driver);
 	ice_debugfs_exit();
 	destroy_workqueue(ice_wq);
@@ -6742,7 +6762,8 @@ static int ice_up_complete(struct ice_vsi *vsi)
 
 	if (vsi->port_info &&
 	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
-	    vsi->netdev && vsi->type == ICE_VSI_PF) {
+	    ((vsi->netdev && (vsi->type == ICE_VSI_PF ||
+			      vsi->type == ICE_VSI_SF)))) {
 		ice_print_link_msg(vsi, true);
 		netif_tx_start_all_queues(vsi->netdev);
 		netif_carrier_on(vsi->netdev);
@@ -7100,7 +7121,6 @@ void ice_update_pf_stats(struct ice_pf *pf)
  * @netdev: network interface device structure
  * @stats: main device statistics structure
  */
-static
 void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -7228,7 +7248,7 @@ int ice_down(struct ice_vsi *vsi)
 	if (tx_err)
 		netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
 			   vsi->vsi_num, tx_err);
-	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+	if (!tx_err && vsi->xdp_rings) {
 		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
 		if (tx_err)
 			netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
@@ -7245,7 +7265,7 @@ int ice_down(struct ice_vsi *vsi)
 	ice_for_each_txq(vsi, i)
 		ice_clean_tx_ring(vsi->tx_rings[i]);
 
-	if (ice_is_xdp_ena_vsi(vsi))
+	if (vsi->xdp_rings)
 		ice_for_each_xdp_txq(vsi, i)
 			ice_clean_tx_ring(vsi->xdp_rings[i]);
 
@@ -7441,7 +7461,7 @@ int ice_vsi_open(struct ice_vsi *vsi)
 
 	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
 
-	if (vsi->type == ICE_VSI_PF) {
+	if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_SF) {
 		/* Notify the stack of the actual queue counts. */
 		err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
 		if (err)
@@ -7450,6 +7470,8 @@ int ice_vsi_open(struct ice_vsi *vsi)
 		err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
 		if (err)
 			goto err_set_qs;
+
+		ice_vsi_set_napi_queues(vsi);
 	}
 
 	err = ice_up_complete(vsi);
@@ -7587,6 +7609,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf)
  */
 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 {
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	bool dvm;
@@ -7729,6 +7752,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		ice_rebuild_arfs(pf);
 	}
 
+	if (vsi && vsi->netdev)
+		netif_device_attach(vsi->netdev);
+
 	ice_update_pf_netdev_link(pf);
 
 	/* tell the firmware we are up */
@@ -7771,7 +7797,7 @@ clear_recovery:
  *
  * Returns 0 on success, negative on failure
  */
-static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+int ice_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -8195,7 +8221,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
  * @netdev: network interface device structure
  * @txqueue: Tx queue
  */
-static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_tx_ring *tx_ring = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
index a2562f04267f..b9f383494b3f 100644
--- a/drivers/net/ethernet/intel/ice/ice_osdep.h
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -12,6 +12,7 @@
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
+#include <linux/iopoll.h>
 #include <linux/pci_ids.h>
 #ifndef CONFIG_64BIT
 #include <linux/io-64-nonatomic-lo-hi.h>
@@ -23,6 +24,9 @@
 #define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
 #define rd64(a, reg)		readq((a)->hw_addr + (reg))
 
+#define rd32_poll_timeout(a, addr, val, cond, delay_us, timeout_us) \
+	read_poll_timeout(rd32, val, cond, delay_us, timeout_us, false, a, addr)
+
 #define ice_flush(a)		rd32((a), GLGEN_STAT)
 #define ICE_M(m, s)		((m ## U) << (s))
 
@@ -39,11 +43,10 @@ struct device *ice_hw_to_dev(struct ice_hw *hw);
 #define ice_debug(hw, type, fmt, args...) \
 	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
 
-#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
-	print_hex_dump_debug(KBUILD_MODNAME " ",		\
-			     DUMP_PREFIX_OFFSET, rowsize,	\
-			     groupsize, buf, len, false)
-#else
+#define _ice_debug_array(hw, type, prefix, rowsize, groupsize, buf, len) \
+	print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET,		 \
+			     rowsize, groupsize, buf, len, false)
+#else /* CONFIG_DYNAMIC_DEBUG */
 #define ice_debug(hw, type, fmt, args...)			\
 do {								\
 	if ((type) & (hw)->debug_mask)				\
@@ -51,16 +54,15 @@ do {								\
 } while (0)
 
 #ifdef DEBUG
-#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+#define _ice_debug_array(hw, type, prefix, rowsize, groupsize, buf, len) \
 do {								\
 	if ((type) & (hw)->debug_mask)				\
-		print_hex_dump_debug(KBUILD_MODNAME,		\
-				     DUMP_PREFIX_OFFSET,	\
+		print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET,\
 				     rowsize, groupsize, buf,	\
 				     len, false);		\
 } while (0)
-#else
-#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+#else /* DEBUG */
+#define _ice_debug_array(hw, type, prefix, rowsize, groupsize, buf, len) \
 do {								\
 	struct ice_hw *hw_l = hw;				\
 	if ((type) & (hw_l)->debug_mask) {			\
@@ -78,4 +80,10 @@ do {								\
 #endif /* DEBUG */
 #endif /* CONFIG_DYNAMIC_DEBUG */
 
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+	_ice_debug_array(hw, type, KBUILD_MODNAME, rowsize, groupsize, buf, len)
+
+#define ice_debug_array_w_prefix(hw, type, prefix, buf, len) \
+	_ice_debug_array(hw, type, prefix, 16, 1, buf, len)
+
 #endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_parser.c b/drivers/net/ethernet/intel/ice/ice_parser.c
new file mode 100644
index 000000000000..664beb64f557
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_parser.c
@@ -0,0 +1,2430 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024 Intel Corporation */
+
+#include "ice_common.h"
+
+struct ice_pkg_sect_hdr {
+	__le16 count;
+	__le16 offset;
+};
+
+/**
+ * ice_parser_sect_item_get - parse an item from a section
+ * @sect_type: section type
+ * @section: section object
+ * @index: index of the item to get
+ * @offset: dummy as prototype of ice_pkg_enum_entry's last parameter
+ *
+ * Return: a pointer to the item or NULL.
+ */
+static void *ice_parser_sect_item_get(u32 sect_type, void *section,
+				      u32 index, u32 __maybe_unused *offset)
+{
+	size_t data_off = ICE_SEC_DATA_OFFSET;
+	struct ice_pkg_sect_hdr *hdr;
+	size_t size;
+
+	if (!section)
+		return NULL;
+
+	switch (sect_type) {
+	case ICE_SID_RXPARSER_IMEM:
+		size = ICE_SID_RXPARSER_IMEM_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_METADATA_INIT:
+		size = ICE_SID_RXPARSER_METADATA_INIT_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_CAM:
+		size = ICE_SID_RXPARSER_CAM_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_PG_SPILL:
+		size = ICE_SID_RXPARSER_PG_SPILL_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_NOMATCH_CAM:
+		size = ICE_SID_RXPARSER_NOMATCH_CAM_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_NOMATCH_SPILL:
+		size = ICE_SID_RXPARSER_NOMATCH_SPILL_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_BOOST_TCAM:
+		size = ICE_SID_RXPARSER_BOOST_TCAM_ENTRY_SIZE;
+		break;
+	case ICE_SID_LBL_RXPARSER_TMEM:
+		data_off = ICE_SEC_LBL_DATA_OFFSET;
+		size = ICE_SID_LBL_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_MARKER_PTYPE:
+		size = ICE_SID_RXPARSER_MARKER_TYPE_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_MARKER_GRP:
+		size = ICE_SID_RXPARSER_MARKER_GRP_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_PROTO_GRP:
+		size = ICE_SID_RXPARSER_PROTO_GRP_ENTRY_SIZE;
+		break;
+	case ICE_SID_RXPARSER_FLAG_REDIR:
+		size = ICE_SID_RXPARSER_FLAG_REDIR_ENTRY_SIZE;
+		break;
+	default:
+		return NULL;
+	}
+
+	hdr = section;
+	if (index >= le16_to_cpu(hdr->count))
+		return NULL;
+
+	return section + data_off + index * size;
+}
+
+/**
+ * ice_parser_create_table - create an item table from a section
+ * @hw: pointer to the hardware structure
+ * @sect_type: section type
+ * @item_size: item size in bytes
+ * @length: number of items in the table to create
+ * @parse_item: the function to parse the item
+ * @no_offset: ignore header offset, calculate index from 0
+ *
+ * Return: a pointer to the allocated table or ERR_PTR.
+ */
+static void *
+ice_parser_create_table(struct ice_hw *hw, u32 sect_type,
+			u32 item_size, u32 length,
+			void (*parse_item)(struct ice_hw *hw, u16 idx,
+					   void *item, void *data,
+					   int size), bool no_offset)
+{
+	struct ice_pkg_enum state = {};
+	struct ice_seg *seg = hw->seg;
+	void *table, *data, *item;
+	u16 idx = 0;
+
+	if (!seg)
+		return ERR_PTR(-EINVAL);
+
+	table = kzalloc(item_size * length, GFP_KERNEL);
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	do {
+		data = ice_pkg_enum_entry(seg, &state, sect_type, NULL,
+					  ice_parser_sect_item_get);
+		seg = NULL;
+		if (data) {
+			struct ice_pkg_sect_hdr *hdr = state.sect;
+
+			if (!no_offset)
+				idx = le16_to_cpu(hdr->offset) +
+					state.entry_idx;
+
+			item = (void *)((uintptr_t)table + idx * item_size);
+			parse_item(hw, idx, item, data, item_size);
+
+			if (no_offset)
+				idx++;
+		}
+	} while (data);
+
+	return table;
+}
+
+/*** ICE_SID_RXPARSER_IMEM section ***/
+static void ice_imem_bst_bm_dump(struct ice_hw *hw, struct ice_bst_main *bm)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "boost main:\n");
+	dev_info(dev, "\talu0 = %d\n", bm->alu0);
+	dev_info(dev, "\talu1 = %d\n", bm->alu1);
+	dev_info(dev, "\talu2 = %d\n", bm->alu2);
+	dev_info(dev, "\tpg = %d\n", bm->pg);
+}
+
+static void ice_imem_bst_kb_dump(struct ice_hw *hw,
+				 struct ice_bst_keybuilder *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "boost key builder:\n");
+	dev_info(dev, "\tpriority = %d\n", kb->prio);
+	dev_info(dev, "\ttsr_ctrl = %d\n", kb->tsr_ctrl);
+}
+
+static void ice_imem_np_kb_dump(struct ice_hw *hw,
+				struct ice_np_keybuilder *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "next proto key builder:\n");
+	dev_info(dev, "\topc = %d\n", kb->opc);
+	dev_info(dev, "\tstart_or_reg0 = %d\n", kb->start_reg0);
+	dev_info(dev, "\tlen_or_reg1 = %d\n", kb->len_reg1);
+}
+
+static void ice_imem_pg_kb_dump(struct ice_hw *hw,
+				struct ice_pg_keybuilder *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "parse graph key builder:\n");
+	dev_info(dev, "\tflag0_ena = %d\n", kb->flag0_ena);
+	dev_info(dev, "\tflag1_ena = %d\n", kb->flag1_ena);
+	dev_info(dev, "\tflag2_ena = %d\n", kb->flag2_ena);
+	dev_info(dev, "\tflag3_ena = %d\n", kb->flag3_ena);
+	dev_info(dev, "\tflag0_idx = %d\n", kb->flag0_idx);
+	dev_info(dev, "\tflag1_idx = %d\n", kb->flag1_idx);
+	dev_info(dev, "\tflag2_idx = %d\n", kb->flag2_idx);
+	dev_info(dev, "\tflag3_idx = %d\n", kb->flag3_idx);
+	dev_info(dev, "\talu_reg_idx = %d\n", kb->alu_reg_idx);
+}
+
+static void ice_imem_alu_dump(struct ice_hw *hw,
+			      struct ice_alu *alu, int index)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "alu%d:\n", index);
+	dev_info(dev, "\topc = %d\n", alu->opc);
+	dev_info(dev, "\tsrc_start = %d\n", alu->src_start);
+	dev_info(dev, "\tsrc_len = %d\n", alu->src_len);
+	dev_info(dev, "\tshift_xlate_sel = %d\n", alu->shift_xlate_sel);
+	dev_info(dev, "\tshift_xlate_key = %d\n", alu->shift_xlate_key);
+	dev_info(dev, "\tsrc_reg_id = %d\n", alu->src_reg_id);
+	dev_info(dev, "\tdst_reg_id = %d\n", alu->dst_reg_id);
+	dev_info(dev, "\tinc0 = %d\n", alu->inc0);
+	dev_info(dev, "\tinc1 = %d\n", alu->inc1);
+	dev_info(dev, "\tproto_offset_opc = %d\n", alu->proto_offset_opc);
+	dev_info(dev, "\tproto_offset = %d\n", alu->proto_offset);
+	dev_info(dev, "\tbranch_addr = %d\n", alu->branch_addr);
+	dev_info(dev, "\timm = %d\n", alu->imm);
+	dev_info(dev, "\tdst_start = %d\n", alu->dst_start);
+	dev_info(dev, "\tdst_len = %d\n", alu->dst_len);
+	dev_info(dev, "\tflags_extr_imm = %d\n", alu->flags_extr_imm);
+	dev_info(dev, "\tflags_start_imm= %d\n", alu->flags_start_imm);
+}
+
+/**
+ * ice_imem_dump - dump an imem item info
+ * @hw: pointer to the hardware structure
+ * @item: imem item to dump
+ */
+static void ice_imem_dump(struct ice_hw *hw, struct ice_imem_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "index = %d\n", item->idx);
+	ice_imem_bst_bm_dump(hw, &item->b_m);
+	ice_imem_bst_kb_dump(hw, &item->b_kb);
+	dev_info(dev, "pg priority = %d\n", item->pg_prio);
+	ice_imem_np_kb_dump(hw, &item->np_kb);
+	ice_imem_pg_kb_dump(hw, &item->pg_kb);
+	ice_imem_alu_dump(hw, &item->alu0, 0);
+	ice_imem_alu_dump(hw, &item->alu1, 1);
+	ice_imem_alu_dump(hw, &item->alu2, 2);
+}
+
+#define ICE_IM_BM_ALU0		BIT(0)
+#define ICE_IM_BM_ALU1		BIT(1)
+#define ICE_IM_BM_ALU2		BIT(2)
+#define ICE_IM_BM_PG		BIT(3)
+
+/**
+ * ice_imem_bm_init - parse 4 bits of Boost Main
+ * @bm: pointer to the Boost Main structure
+ * @data: Boost Main data to be parsed
+ */
+static void ice_imem_bm_init(struct ice_bst_main *bm, u8 data)
+{
+	bm->alu0	= FIELD_GET(ICE_IM_BM_ALU0, data);
+	bm->alu1	= FIELD_GET(ICE_IM_BM_ALU1, data);
+	bm->alu2	= FIELD_GET(ICE_IM_BM_ALU2, data);
+	bm->pg		= FIELD_GET(ICE_IM_BM_PG, data);
+}
+
+#define ICE_IM_BKB_PRIO		GENMASK(7, 0)
+#define ICE_IM_BKB_TSR_CTRL	BIT(8)
+
+/**
+ * ice_imem_bkb_init - parse 10 bits of Boost Main Build
+ * @bkb: pointer to the Boost Main Build structure
+ * @data: Boost Main Build data to be parsed
+ */
+static void ice_imem_bkb_init(struct ice_bst_keybuilder *bkb, u16 data)
+{
+	bkb->prio	= FIELD_GET(ICE_IM_BKB_PRIO, data);
+	bkb->tsr_ctrl	= FIELD_GET(ICE_IM_BKB_TSR_CTRL, data);
+}
+
+#define ICE_IM_NPKB_OPC		GENMASK(1, 0)
+#define ICE_IM_NPKB_S_R0	GENMASK(9, 2)
+#define ICE_IM_NPKB_L_R1	GENMASK(17, 10)
+
+/**
+ * ice_imem_npkb_init - parse 18 bits of Next Protocol Key Build
+ * @kb: pointer to the Next Protocol Key Build structure
+ * @data: Next Protocol Key Build data to be parsed
+ */
+static void ice_imem_npkb_init(struct ice_np_keybuilder *kb, u32 data)
+{
+	kb->opc		= FIELD_GET(ICE_IM_NPKB_OPC, data);
+	kb->start_reg0	= FIELD_GET(ICE_IM_NPKB_S_R0, data);
+	kb->len_reg1	= FIELD_GET(ICE_IM_NPKB_L_R1, data);
+}
+
+#define ICE_IM_PGKB_F0_ENA	BIT_ULL(0)
+#define ICE_IM_PGKB_F0_IDX	GENMASK_ULL(6, 1)
+#define ICE_IM_PGKB_F1_ENA	BIT_ULL(7)
+#define ICE_IM_PGKB_F1_IDX	GENMASK_ULL(13, 8)
+#define ICE_IM_PGKB_F2_ENA	BIT_ULL(14)
+#define ICE_IM_PGKB_F2_IDX	GENMASK_ULL(20, 15)
+#define ICE_IM_PGKB_F3_ENA	BIT_ULL(21)
+#define ICE_IM_PGKB_F3_IDX	GENMASK_ULL(27, 22)
+#define ICE_IM_PGKB_AR_IDX	GENMASK_ULL(34, 28)
+
+/**
+ * ice_imem_pgkb_init - parse 35 bits of Parse Graph Key Build
+ * @kb: pointer to the Parse Graph Key Build structure
+ * @data: Parse Graph Key Build data to be parsed
+ */
+static void ice_imem_pgkb_init(struct ice_pg_keybuilder *kb, u64 data)
+{
+	kb->flag0_ena	= FIELD_GET(ICE_IM_PGKB_F0_ENA, data);
+	kb->flag0_idx	= FIELD_GET(ICE_IM_PGKB_F0_IDX, data);
+	kb->flag1_ena	= FIELD_GET(ICE_IM_PGKB_F1_ENA, data);
+	kb->flag1_idx	= FIELD_GET(ICE_IM_PGKB_F1_IDX, data);
+	kb->flag2_ena	= FIELD_GET(ICE_IM_PGKB_F2_ENA, data);
+	kb->flag2_idx	= FIELD_GET(ICE_IM_PGKB_F2_IDX, data);
+	kb->flag3_ena	= FIELD_GET(ICE_IM_PGKB_F3_ENA, data);
+	kb->flag3_idx	= FIELD_GET(ICE_IM_PGKB_F3_IDX, data);
+	kb->alu_reg_idx	= FIELD_GET(ICE_IM_PGKB_AR_IDX, data);
+}
+
+#define ICE_IM_ALU_OPC		GENMASK_ULL(5, 0)
+#define ICE_IM_ALU_SS		GENMASK_ULL(13, 6)
+#define ICE_IM_ALU_SL		GENMASK_ULL(18, 14)
+#define ICE_IM_ALU_SXS		BIT_ULL(19)
+#define ICE_IM_ALU_SXK		GENMASK_ULL(23, 20)
+#define ICE_IM_ALU_SRID		GENMASK_ULL(30, 24)
+#define ICE_IM_ALU_DRID		GENMASK_ULL(37, 31)
+#define ICE_IM_ALU_INC0		BIT_ULL(38)
+#define ICE_IM_ALU_INC1		BIT_ULL(39)
+#define ICE_IM_ALU_POO		GENMASK_ULL(41, 40)
+#define ICE_IM_ALU_PO		GENMASK_ULL(49, 42)
+#define ICE_IM_ALU_BA_S		50	/* offset for the 2nd 64-bits field */
+#define ICE_IM_ALU_BA		GENMASK_ULL(57 - ICE_IM_ALU_BA_S, \
+					    50 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_IMM		GENMASK_ULL(73 - ICE_IM_ALU_BA_S, \
+					    58 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_DFE		BIT_ULL(74 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_DS		GENMASK_ULL(80 - ICE_IM_ALU_BA_S, \
+					    75 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_DL		GENMASK_ULL(86 - ICE_IM_ALU_BA_S, \
+					    81 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_FEI		BIT_ULL(87 - ICE_IM_ALU_BA_S)
+#define ICE_IM_ALU_FSI		GENMASK_ULL(95 - ICE_IM_ALU_BA_S, \
+					    88 - ICE_IM_ALU_BA_S)
+
+/**
+ * ice_imem_alu_init - parse 96 bits of ALU entry
+ * @alu: pointer to the ALU entry structure
+ * @data: ALU entry data to be parsed
+ * @off: offset of the ALU entry data
+ */
+static void ice_imem_alu_init(struct ice_alu *alu, u8 *data, u8 off)
+{
+	u64 d64;
+	u8 idd;
+
+	d64 = *((u64 *)data) >> off;
+
+	alu->opc		= FIELD_GET(ICE_IM_ALU_OPC, d64);
+	alu->src_start		= FIELD_GET(ICE_IM_ALU_SS, d64);
+	alu->src_len		= FIELD_GET(ICE_IM_ALU_SL, d64);
+	alu->shift_xlate_sel	= FIELD_GET(ICE_IM_ALU_SXS, d64);
+	alu->shift_xlate_key	= FIELD_GET(ICE_IM_ALU_SXK, d64);
+	alu->src_reg_id		= FIELD_GET(ICE_IM_ALU_SRID, d64);
+	alu->dst_reg_id		= FIELD_GET(ICE_IM_ALU_DRID, d64);
+	alu->inc0		= FIELD_GET(ICE_IM_ALU_INC0, d64);
+	alu->inc1		= FIELD_GET(ICE_IM_ALU_INC1, d64);
+	alu->proto_offset_opc	= FIELD_GET(ICE_IM_ALU_POO, d64);
+	alu->proto_offset	= FIELD_GET(ICE_IM_ALU_PO, d64);
+
+	idd = (ICE_IM_ALU_BA_S + off) / BITS_PER_BYTE;
+	off = (ICE_IM_ALU_BA_S + off) % BITS_PER_BYTE;
+	d64 = *((u64 *)(&data[idd])) >> off;
+
+	alu->branch_addr	= FIELD_GET(ICE_IM_ALU_BA, d64);
+	alu->imm		= FIELD_GET(ICE_IM_ALU_IMM, d64);
+	alu->dedicate_flags_ena	= FIELD_GET(ICE_IM_ALU_DFE, d64);
+	alu->dst_start		= FIELD_GET(ICE_IM_ALU_DS, d64);
+	alu->dst_len		= FIELD_GET(ICE_IM_ALU_DL, d64);
+	alu->flags_extr_imm	= FIELD_GET(ICE_IM_ALU_FEI, d64);
+	alu->flags_start_imm	= FIELD_GET(ICE_IM_ALU_FSI, d64);
+}
+
+#define ICE_IMEM_BM_S		0
+#define ICE_IMEM_BKB_S		4
+#define ICE_IMEM_BKB_IDD	(ICE_IMEM_BKB_S / BITS_PER_BYTE)
+#define ICE_IMEM_BKB_OFF	(ICE_IMEM_BKB_S % BITS_PER_BYTE)
+#define ICE_IMEM_PGP		GENMASK(15, 14)
+#define ICE_IMEM_NPKB_S		16
+#define ICE_IMEM_NPKB_IDD	(ICE_IMEM_NPKB_S / BITS_PER_BYTE)
+#define ICE_IMEM_NPKB_OFF	(ICE_IMEM_NPKB_S % BITS_PER_BYTE)
+#define ICE_IMEM_PGKB_S		34
+#define ICE_IMEM_PGKB_IDD	(ICE_IMEM_PGKB_S / BITS_PER_BYTE)
+#define ICE_IMEM_PGKB_OFF	(ICE_IMEM_PGKB_S % BITS_PER_BYTE)
+#define ICE_IMEM_ALU0_S		69
+#define ICE_IMEM_ALU0_IDD	(ICE_IMEM_ALU0_S / BITS_PER_BYTE)
+#define ICE_IMEM_ALU0_OFF	(ICE_IMEM_ALU0_S % BITS_PER_BYTE)
+#define ICE_IMEM_ALU1_S		165
+#define ICE_IMEM_ALU1_IDD	(ICE_IMEM_ALU1_S / BITS_PER_BYTE)
+#define ICE_IMEM_ALU1_OFF	(ICE_IMEM_ALU1_S % BITS_PER_BYTE)
+#define ICE_IMEM_ALU2_S		357
+#define ICE_IMEM_ALU2_IDD	(ICE_IMEM_ALU2_S / BITS_PER_BYTE)
+#define ICE_IMEM_ALU2_OFF	(ICE_IMEM_ALU2_S % BITS_PER_BYTE)
+
+/**
+ * ice_imem_parse_item - parse 384 bits of IMEM entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of IMEM entry
+ * @item: item of IMEM entry
+ * @data: IMEM entry data to be parsed
+ * @size: size of IMEM entry
+ */
+static void ice_imem_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				void *data, int __maybe_unused size)
+{
+	struct ice_imem_item *ii = item;
+	u8 *buf = data;
+
+	ii->idx = idx;
+
+	ice_imem_bm_init(&ii->b_m, *(u8 *)buf);
+	ice_imem_bkb_init(&ii->b_kb,
+			  *((u16 *)(&buf[ICE_IMEM_BKB_IDD])) >>
+			   ICE_IMEM_BKB_OFF);
+
+	ii->pg_prio = FIELD_GET(ICE_IMEM_PGP, *(u16 *)buf);
+
+	ice_imem_npkb_init(&ii->np_kb,
+			   *((u32 *)(&buf[ICE_IMEM_NPKB_IDD])) >>
+			    ICE_IMEM_NPKB_OFF);
+	ice_imem_pgkb_init(&ii->pg_kb,
+			   *((u64 *)(&buf[ICE_IMEM_PGKB_IDD])) >>
+			    ICE_IMEM_PGKB_OFF);
+
+	ice_imem_alu_init(&ii->alu0,
+			  &buf[ICE_IMEM_ALU0_IDD],
+			  ICE_IMEM_ALU0_OFF);
+	ice_imem_alu_init(&ii->alu1,
+			  &buf[ICE_IMEM_ALU1_IDD],
+			  ICE_IMEM_ALU1_OFF);
+	ice_imem_alu_init(&ii->alu2,
+			  &buf[ICE_IMEM_ALU2_IDD],
+			  ICE_IMEM_ALU2_OFF);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_imem_dump(hw, ii);
+}
+
+/**
+ * ice_imem_table_get - create an imem table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated IMEM table.
+ */
+static struct ice_imem_item *ice_imem_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_IMEM,
+				       sizeof(struct ice_imem_item),
+				       ICE_IMEM_TABLE_SIZE,
+				       ice_imem_parse_item, false);
+}
+
+/*** ICE_SID_RXPARSER_METADATA_INIT section ***/
+/**
+ * ice_metainit_dump - dump an metainit item info
+ * @hw: pointer to the hardware structure
+ * @item: metainit item to dump
+ */
+static void ice_metainit_dump(struct ice_hw *hw, struct ice_metainit_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "index = %d\n", item->idx);
+
+	dev_info(dev, "tsr = %d\n", item->tsr);
+	dev_info(dev, "ho = %d\n", item->ho);
+	dev_info(dev, "pc = %d\n", item->pc);
+	dev_info(dev, "pg_rn = %d\n", item->pg_rn);
+	dev_info(dev, "cd = %d\n", item->cd);
+
+	dev_info(dev, "gpr_a_ctrl = %d\n", item->gpr_a_ctrl);
+	dev_info(dev, "gpr_a_data_mdid = %d\n", item->gpr_a_data_mdid);
+	dev_info(dev, "gpr_a_data_start = %d\n", item->gpr_a_data_start);
+	dev_info(dev, "gpr_a_data_len = %d\n", item->gpr_a_data_len);
+	dev_info(dev, "gpr_a_id = %d\n", item->gpr_a_id);
+
+	dev_info(dev, "gpr_b_ctrl = %d\n", item->gpr_b_ctrl);
+	dev_info(dev, "gpr_b_data_mdid = %d\n", item->gpr_b_data_mdid);
+	dev_info(dev, "gpr_b_data_start = %d\n", item->gpr_b_data_start);
+	dev_info(dev, "gpr_b_data_len = %d\n", item->gpr_b_data_len);
+	dev_info(dev, "gpr_b_id = %d\n", item->gpr_b_id);
+
+	dev_info(dev, "gpr_c_ctrl = %d\n", item->gpr_c_ctrl);
+	dev_info(dev, "gpr_c_data_mdid = %d\n", item->gpr_c_data_mdid);
+	dev_info(dev, "gpr_c_data_start = %d\n", item->gpr_c_data_start);
+	dev_info(dev, "gpr_c_data_len = %d\n", item->gpr_c_data_len);
+	dev_info(dev, "gpr_c_id = %d\n", item->gpr_c_id);
+
+	dev_info(dev, "gpr_d_ctrl = %d\n", item->gpr_d_ctrl);
+	dev_info(dev, "gpr_d_data_mdid = %d\n", item->gpr_d_data_mdid);
+	dev_info(dev, "gpr_d_data_start = %d\n", item->gpr_d_data_start);
+	dev_info(dev, "gpr_d_data_len = %d\n", item->gpr_d_data_len);
+	dev_info(dev, "gpr_d_id = %d\n", item->gpr_d_id);
+
+	dev_info(dev, "flags = 0x%llx\n", (unsigned long long)(item->flags));
+}
+
+#define ICE_MI_TSR		GENMASK_ULL(7, 0)
+#define ICE_MI_HO		GENMASK_ULL(16, 8)
+#define ICE_MI_PC		GENMASK_ULL(24, 17)
+#define ICE_MI_PGRN		GENMASK_ULL(35, 25)
+#define ICE_MI_CD		GENMASK_ULL(38, 36)
+#define ICE_MI_GAC		BIT_ULL(39)
+#define ICE_MI_GADM		GENMASK_ULL(44, 40)
+#define ICE_MI_GADS		GENMASK_ULL(48, 45)
+#define ICE_MI_GADL		GENMASK_ULL(53, 49)
+#define ICE_MI_GAI		GENMASK_ULL(59, 56)
+#define ICE_MI_GBC		BIT_ULL(60)
+#define ICE_MI_GBDM_S		61	/* offset for the 2nd 64-bits field */
+#define ICE_MI_GBDM_IDD		(ICE_MI_GBDM_S / BITS_PER_BYTE)
+#define ICE_MI_GBDM_OFF		(ICE_MI_GBDM_S % BITS_PER_BYTE)
+
+#define ICE_MI_GBDM_GENMASK_ULL(high, low) \
+	GENMASK_ULL((high) - ICE_MI_GBDM_S, (low) - ICE_MI_GBDM_S)
+#define ICE_MI_GBDM		ICE_MI_GBDM_GENMASK_ULL(65, 61)
+#define ICE_MI_GBDS		ICE_MI_GBDM_GENMASK_ULL(69, 66)
+#define ICE_MI_GBDL		ICE_MI_GBDM_GENMASK_ULL(74, 70)
+#define ICE_MI_GBI		ICE_MI_GBDM_GENMASK_ULL(80, 77)
+#define ICE_MI_GCC		BIT_ULL(81 - ICE_MI_GBDM_S)
+#define ICE_MI_GCDM		ICE_MI_GBDM_GENMASK_ULL(86, 82)
+#define ICE_MI_GCDS		ICE_MI_GBDM_GENMASK_ULL(90, 87)
+#define ICE_MI_GCDL		ICE_MI_GBDM_GENMASK_ULL(95, 91)
+#define ICE_MI_GCI		ICE_MI_GBDM_GENMASK_ULL(101, 98)
+#define ICE_MI_GDC		BIT_ULL(102 - ICE_MI_GBDM_S)
+#define ICE_MI_GDDM		ICE_MI_GBDM_GENMASK_ULL(107, 103)
+#define ICE_MI_GDDS		ICE_MI_GBDM_GENMASK_ULL(111, 108)
+#define ICE_MI_GDDL		ICE_MI_GBDM_GENMASK_ULL(116, 112)
+#define ICE_MI_GDI		ICE_MI_GBDM_GENMASK_ULL(122, 119)
+#define ICE_MI_FLAG_S		123	/* offset for the 3rd 64-bits field */
+#define ICE_MI_FLAG_IDD		(ICE_MI_FLAG_S / BITS_PER_BYTE)
+#define ICE_MI_FLAG_OFF		(ICE_MI_FLAG_S % BITS_PER_BYTE)
+#define ICE_MI_FLAG		GENMASK_ULL(186 - ICE_MI_FLAG_S, \
+					    123 - ICE_MI_FLAG_S)
+
+/**
+ * ice_metainit_parse_item - parse 192 bits of Metadata Init entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Metadata Init entry
+ * @item: item of Metadata Init entry
+ * @data: Metadata Init entry data to be parsed
+ * @size: size of Metadata Init entry
+ */
+static void ice_metainit_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				    void *data, int __maybe_unused size)
+{
+	struct ice_metainit_item *mi = item;
+	u8 *buf = data;
+	u64 d64;
+
+	mi->idx = idx;
+
+	d64 = *(u64 *)buf;
+
+	mi->tsr			= FIELD_GET(ICE_MI_TSR, d64);
+	mi->ho			= FIELD_GET(ICE_MI_HO, d64);
+	mi->pc			= FIELD_GET(ICE_MI_PC, d64);
+	mi->pg_rn		= FIELD_GET(ICE_MI_PGRN, d64);
+	mi->cd			= FIELD_GET(ICE_MI_CD, d64);
+
+	mi->gpr_a_ctrl		= FIELD_GET(ICE_MI_GAC, d64);
+	mi->gpr_a_data_mdid	= FIELD_GET(ICE_MI_GADM, d64);
+	mi->gpr_a_data_start	= FIELD_GET(ICE_MI_GADS, d64);
+	mi->gpr_a_data_len	= FIELD_GET(ICE_MI_GADL, d64);
+	mi->gpr_a_id		= FIELD_GET(ICE_MI_GAI, d64);
+
+	mi->gpr_b_ctrl		= FIELD_GET(ICE_MI_GBC, d64);
+
+	d64 = *((u64 *)&buf[ICE_MI_GBDM_IDD]) >> ICE_MI_GBDM_OFF;
+
+	mi->gpr_b_data_mdid	= FIELD_GET(ICE_MI_GBDM, d64);
+	mi->gpr_b_data_start	= FIELD_GET(ICE_MI_GBDS, d64);
+	mi->gpr_b_data_len	= FIELD_GET(ICE_MI_GBDL, d64);
+	mi->gpr_b_id		= FIELD_GET(ICE_MI_GBI, d64);
+
+	mi->gpr_c_ctrl		= FIELD_GET(ICE_MI_GCC, d64);
+	mi->gpr_c_data_mdid	= FIELD_GET(ICE_MI_GCDM, d64);
+	mi->gpr_c_data_start	= FIELD_GET(ICE_MI_GCDS, d64);
+	mi->gpr_c_data_len	= FIELD_GET(ICE_MI_GCDL, d64);
+	mi->gpr_c_id		= FIELD_GET(ICE_MI_GCI, d64);
+
+	mi->gpr_d_ctrl		= FIELD_GET(ICE_MI_GDC, d64);
+	mi->gpr_d_data_mdid	= FIELD_GET(ICE_MI_GDDM, d64);
+	mi->gpr_d_data_start	= FIELD_GET(ICE_MI_GDDS, d64);
+	mi->gpr_d_data_len	= FIELD_GET(ICE_MI_GDDL, d64);
+	mi->gpr_d_id		= FIELD_GET(ICE_MI_GDI, d64);
+
+	d64 = *((u64 *)&buf[ICE_MI_FLAG_IDD]) >> ICE_MI_FLAG_OFF;
+
+	mi->flags		= FIELD_GET(ICE_MI_FLAG, d64);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_metainit_dump(hw, mi);
+}
+
+/**
+ * ice_metainit_table_get - create a metainit table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Metadata initialization table.
+ */
+static struct ice_metainit_item *ice_metainit_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_METADATA_INIT,
+				       sizeof(struct ice_metainit_item),
+				       ICE_METAINIT_TABLE_SIZE,
+				       ice_metainit_parse_item, false);
+}
+
+/**
+ * ice_bst_tcam_search - find a TCAM item with specific type
+ * @tcam_table: the TCAM table
+ * @lbl_table: the lbl table to search
+ * @type: the type we need to match against
+ * @start: start searching from this index
+ *
+ * Return: a pointer to the matching BOOST TCAM item or NULL.
+ */
+struct ice_bst_tcam_item *
+ice_bst_tcam_search(struct ice_bst_tcam_item *tcam_table,
+		    struct ice_lbl_item *lbl_table,
+		    enum ice_lbl_type type, u16 *start)
+{
+	u16 i = *start;
+
+	for (; i < ICE_BST_TCAM_TABLE_SIZE; i++) {
+		if (lbl_table[i].type == type) {
+			*start = i;
+			return &tcam_table[lbl_table[i].idx];
+		}
+	}
+
+	return NULL;
+}
+
+/*** ICE_SID_RXPARSER_CAM, ICE_SID_RXPARSER_PG_SPILL,
+ *    ICE_SID_RXPARSER_NOMATCH_CAM and ICE_SID_RXPARSER_NOMATCH_CAM
+ *    sections ***/
+static void ice_pg_cam_key_dump(struct ice_hw *hw, struct ice_pg_cam_key *key)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "key:\n");
+	dev_info(dev, "\tvalid = %d\n", key->valid);
+	dev_info(dev, "\tnode_id = %d\n", key->node_id);
+	dev_info(dev, "\tflag0 = %d\n", key->flag0);
+	dev_info(dev, "\tflag1 = %d\n", key->flag1);
+	dev_info(dev, "\tflag2 = %d\n", key->flag2);
+	dev_info(dev, "\tflag3 = %d\n", key->flag3);
+	dev_info(dev, "\tboost_idx = %d\n", key->boost_idx);
+	dev_info(dev, "\talu_reg = 0x%04x\n", key->alu_reg);
+	dev_info(dev, "\tnext_proto = 0x%08x\n", key->next_proto);
+}
+
+static void ice_pg_nm_cam_key_dump(struct ice_hw *hw,
+				   struct ice_pg_nm_cam_key *key)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "key:\n");
+	dev_info(dev, "\tvalid = %d\n", key->valid);
+	dev_info(dev, "\tnode_id = %d\n", key->node_id);
+	dev_info(dev, "\tflag0 = %d\n", key->flag0);
+	dev_info(dev, "\tflag1 = %d\n", key->flag1);
+	dev_info(dev, "\tflag2 = %d\n", key->flag2);
+	dev_info(dev, "\tflag3 = %d\n", key->flag3);
+	dev_info(dev, "\tboost_idx = %d\n", key->boost_idx);
+	dev_info(dev, "\talu_reg = 0x%04x\n", key->alu_reg);
+}
+
+static void ice_pg_cam_action_dump(struct ice_hw *hw,
+				   struct ice_pg_cam_action *action)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "action:\n");
+	dev_info(dev, "\tnext_node = %d\n", action->next_node);
+	dev_info(dev, "\tnext_pc = %d\n", action->next_pc);
+	dev_info(dev, "\tis_pg = %d\n", action->is_pg);
+	dev_info(dev, "\tproto_id = %d\n", action->proto_id);
+	dev_info(dev, "\tis_mg = %d\n", action->is_mg);
+	dev_info(dev, "\tmarker_id = %d\n", action->marker_id);
+	dev_info(dev, "\tis_last_round = %d\n", action->is_last_round);
+	dev_info(dev, "\tho_polarity = %d\n", action->ho_polarity);
+	dev_info(dev, "\tho_inc = %d\n", action->ho_inc);
+}
+
+/**
+ * ice_pg_cam_dump - dump an parse graph cam info
+ * @hw: pointer to the hardware structure
+ * @item: parse graph cam to dump
+ */
+static void ice_pg_cam_dump(struct ice_hw *hw, struct ice_pg_cam_item *item)
+{
+	dev_info(ice_hw_to_dev(hw), "index = %d\n", item->idx);
+	ice_pg_cam_key_dump(hw, &item->key);
+	ice_pg_cam_action_dump(hw, &item->action);
+}
+
+/**
+ * ice_pg_nm_cam_dump - dump an parse graph no match cam info
+ * @hw: pointer to the hardware structure
+ * @item: parse graph no match cam to dump
+ */
+static void ice_pg_nm_cam_dump(struct ice_hw *hw,
+			       struct ice_pg_nm_cam_item *item)
+{
+	dev_info(ice_hw_to_dev(hw), "index = %d\n", item->idx);
+	ice_pg_nm_cam_key_dump(hw, &item->key);
+	ice_pg_cam_action_dump(hw, &item->action);
+}
+
+#define ICE_PGCA_NN	GENMASK_ULL(10, 0)
+#define ICE_PGCA_NPC	GENMASK_ULL(18, 11)
+#define ICE_PGCA_IPG	BIT_ULL(19)
+#define ICE_PGCA_PID	GENMASK_ULL(30, 23)
+#define ICE_PGCA_IMG	BIT_ULL(31)
+#define ICE_PGCA_MID	GENMASK_ULL(39, 32)
+#define ICE_PGCA_ILR	BIT_ULL(40)
+#define ICE_PGCA_HOP	BIT_ULL(41)
+#define ICE_PGCA_HOI	GENMASK_ULL(50, 42)
+
+/**
+ * ice_pg_cam_action_init - parse 55 bits of Parse Graph CAM Action
+ * @action: pointer to the Parse Graph CAM Action structure
+ * @data: Parse Graph CAM Action data to be parsed
+ */
+static void ice_pg_cam_action_init(struct ice_pg_cam_action *action, u64 data)
+{
+	action->next_node	= FIELD_GET(ICE_PGCA_NN, data);
+	action->next_pc		= FIELD_GET(ICE_PGCA_NPC, data);
+	action->is_pg		= FIELD_GET(ICE_PGCA_IPG, data);
+	action->proto_id	= FIELD_GET(ICE_PGCA_PID, data);
+	action->is_mg		= FIELD_GET(ICE_PGCA_IMG, data);
+	action->marker_id	= FIELD_GET(ICE_PGCA_MID, data);
+	action->is_last_round	= FIELD_GET(ICE_PGCA_ILR, data);
+	action->ho_polarity	= FIELD_GET(ICE_PGCA_HOP, data);
+	action->ho_inc		= FIELD_GET(ICE_PGCA_HOI, data);
+}
+
+#define ICE_PGNCK_VLD		BIT_ULL(0)
+#define ICE_PGNCK_NID		GENMASK_ULL(11, 1)
+#define ICE_PGNCK_F0		BIT_ULL(12)
+#define ICE_PGNCK_F1		BIT_ULL(13)
+#define ICE_PGNCK_F2		BIT_ULL(14)
+#define ICE_PGNCK_F3		BIT_ULL(15)
+#define ICE_PGNCK_BH		BIT_ULL(16)
+#define ICE_PGNCK_BI		GENMASK_ULL(24, 17)
+#define ICE_PGNCK_AR		GENMASK_ULL(40, 25)
+
+/**
+ * ice_pg_nm_cam_key_init - parse 41 bits of Parse Graph NoMatch CAM Key
+ * @key: pointer to the Parse Graph NoMatch CAM Key structure
+ * @data: Parse Graph NoMatch CAM Key data to be parsed
+ */
+static void ice_pg_nm_cam_key_init(struct ice_pg_nm_cam_key *key, u64 data)
+{
+	key->valid	= FIELD_GET(ICE_PGNCK_VLD, data);
+	key->node_id	= FIELD_GET(ICE_PGNCK_NID, data);
+	key->flag0	= FIELD_GET(ICE_PGNCK_F0, data);
+	key->flag1	= FIELD_GET(ICE_PGNCK_F1, data);
+	key->flag2	= FIELD_GET(ICE_PGNCK_F2, data);
+	key->flag3	= FIELD_GET(ICE_PGNCK_F3, data);
+
+	if (FIELD_GET(ICE_PGNCK_BH, data))
+		key->boost_idx = FIELD_GET(ICE_PGNCK_BI, data);
+	else
+		key->boost_idx = 0;
+
+	key->alu_reg	= FIELD_GET(ICE_PGNCK_AR, data);
+}
+
+#define ICE_PGCK_VLD		BIT_ULL(0)
+#define ICE_PGCK_NID		GENMASK_ULL(11, 1)
+#define ICE_PGCK_F0		BIT_ULL(12)
+#define ICE_PGCK_F1		BIT_ULL(13)
+#define ICE_PGCK_F2		BIT_ULL(14)
+#define ICE_PGCK_F3		BIT_ULL(15)
+#define ICE_PGCK_BH		BIT_ULL(16)
+#define ICE_PGCK_BI		GENMASK_ULL(24, 17)
+#define ICE_PGCK_AR		GENMASK_ULL(40, 25)
+#define ICE_PGCK_NPK_S		41	/* offset for the 2nd 64-bits field */
+#define ICE_PGCK_NPK_IDD	(ICE_PGCK_NPK_S / BITS_PER_BYTE)
+#define ICE_PGCK_NPK_OFF	(ICE_PGCK_NPK_S % BITS_PER_BYTE)
+#define ICE_PGCK_NPK		GENMASK_ULL(72 - ICE_PGCK_NPK_S, \
+					    41 - ICE_PGCK_NPK_S)
+
+/**
+ * ice_pg_cam_key_init - parse 73 bits of Parse Graph CAM Key
+ * @key: pointer to the Parse Graph CAM Key structure
+ * @data: Parse Graph CAM Key data to be parsed
+ */
+static void ice_pg_cam_key_init(struct ice_pg_cam_key *key, u8 *data)
+{
+	u64 d64 = *(u64 *)data;
+
+	key->valid	= FIELD_GET(ICE_PGCK_VLD, d64);
+	key->node_id	= FIELD_GET(ICE_PGCK_NID, d64);
+	key->flag0	= FIELD_GET(ICE_PGCK_F0, d64);
+	key->flag1	= FIELD_GET(ICE_PGCK_F1, d64);
+	key->flag2	= FIELD_GET(ICE_PGCK_F2, d64);
+	key->flag3	= FIELD_GET(ICE_PGCK_F3, d64);
+
+	if (FIELD_GET(ICE_PGCK_BH, d64))
+		key->boost_idx = FIELD_GET(ICE_PGCK_BI, d64);
+	else
+		key->boost_idx = 0;
+
+	key->alu_reg	= FIELD_GET(ICE_PGCK_AR, d64);
+
+	d64 = *((u64 *)&data[ICE_PGCK_NPK_IDD]) >> ICE_PGCK_NPK_OFF;
+
+	key->next_proto	= FIELD_GET(ICE_PGCK_NPK, d64);
+}
+
+#define ICE_PG_CAM_ACT_S	73
+#define ICE_PG_CAM_ACT_IDD	(ICE_PG_CAM_ACT_S / BITS_PER_BYTE)
+#define ICE_PG_CAM_ACT_OFF	(ICE_PG_CAM_ACT_S % BITS_PER_BYTE)
+
+/**
+ * ice_pg_cam_parse_item - parse 128 bits of Parse Graph CAM Entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Parse Graph CAM Entry
+ * @item: item of Parse Graph CAM Entry
+ * @data: Parse Graph CAM Entry data to be parsed
+ * @size: size of Parse Graph CAM Entry
+ */
+static void ice_pg_cam_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				  void *data, int __maybe_unused size)
+{
+	struct ice_pg_cam_item *ci = item;
+	u8 *buf = data;
+	u64 d64;
+
+	ci->idx = idx;
+
+	ice_pg_cam_key_init(&ci->key, buf);
+
+	d64 = *((u64 *)&buf[ICE_PG_CAM_ACT_IDD]) >> ICE_PG_CAM_ACT_OFF;
+	ice_pg_cam_action_init(&ci->action, d64);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_pg_cam_dump(hw, ci);
+}
+
+#define ICE_PG_SP_CAM_KEY_S	56
+#define ICE_PG_SP_CAM_KEY_IDD	(ICE_PG_SP_CAM_KEY_S / BITS_PER_BYTE)
+
+/**
+ * ice_pg_sp_cam_parse_item - parse 136 bits of Parse Graph Spill CAM Entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Parse Graph Spill CAM Entry
+ * @item: item of Parse Graph Spill CAM Entry
+ * @data: Parse Graph Spill CAM Entry data to be parsed
+ * @size: size of Parse Graph Spill CAM Entry
+ */
+static void ice_pg_sp_cam_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				     void *data, int __maybe_unused size)
+{
+	struct ice_pg_cam_item *ci = item;
+	u8 *buf = data;
+	u64 d64;
+
+	ci->idx = idx;
+
+	d64 = *(u64 *)buf;
+	ice_pg_cam_action_init(&ci->action, d64);
+
+	ice_pg_cam_key_init(&ci->key, &buf[ICE_PG_SP_CAM_KEY_IDD]);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_pg_cam_dump(hw, ci);
+}
+
+#define ICE_PG_NM_CAM_ACT_S	41
+#define ICE_PG_NM_CAM_ACT_IDD	(ICE_PG_NM_CAM_ACT_S / BITS_PER_BYTE)
+#define ICE_PG_NM_CAM_ACT_OFF   (ICE_PG_NM_CAM_ACT_S % BITS_PER_BYTE)
+
+/**
+ * ice_pg_nm_cam_parse_item - parse 96 bits of Parse Graph NoMatch CAM Entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Parse Graph NoMatch CAM Entry
+ * @item: item of Parse Graph NoMatch CAM Entry
+ * @data: Parse Graph NoMatch CAM Entry data to be parsed
+ * @size: size of Parse Graph NoMatch CAM Entry
+ */
+static void ice_pg_nm_cam_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				     void *data, int __maybe_unused size)
+{
+	struct ice_pg_nm_cam_item *ci = item;
+	u8 *buf = data;
+	u64 d64;
+
+	ci->idx = idx;
+
+	d64 = *(u64 *)buf;
+	ice_pg_nm_cam_key_init(&ci->key, d64);
+
+	d64 = *((u64 *)&buf[ICE_PG_NM_CAM_ACT_IDD]) >> ICE_PG_NM_CAM_ACT_OFF;
+	ice_pg_cam_action_init(&ci->action, d64);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_pg_nm_cam_dump(hw, ci);
+}
+
+#define ICE_PG_NM_SP_CAM_ACT_S		56
+#define ICE_PG_NM_SP_CAM_ACT_IDD	(ICE_PG_NM_SP_CAM_ACT_S / BITS_PER_BYTE)
+#define ICE_PG_NM_SP_CAM_ACT_OFF	(ICE_PG_NM_SP_CAM_ACT_S % BITS_PER_BYTE)
+
+/**
+ * ice_pg_nm_sp_cam_parse_item - parse 104 bits of Parse Graph NoMatch Spill
+ *  CAM Entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Parse Graph NoMatch Spill CAM Entry
+ * @item: item of Parse Graph NoMatch Spill CAM Entry
+ * @data: Parse Graph NoMatch Spill CAM Entry data to be parsed
+ * @size: size of Parse Graph NoMatch Spill CAM Entry
+ */
+static void ice_pg_nm_sp_cam_parse_item(struct ice_hw *hw, u16 idx,
+					void *item, void *data,
+					int __maybe_unused size)
+{
+	struct ice_pg_nm_cam_item *ci = item;
+	u8 *buf = data;
+	u64 d64;
+
+	ci->idx = idx;
+
+	d64 = *(u64 *)buf;
+	ice_pg_cam_action_init(&ci->action, d64);
+
+	d64 = *((u64 *)&buf[ICE_PG_NM_SP_CAM_ACT_IDD]) >>
+		ICE_PG_NM_SP_CAM_ACT_OFF;
+	ice_pg_nm_cam_key_init(&ci->key, d64);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_pg_nm_cam_dump(hw, ci);
+}
+
+/**
+ * ice_pg_cam_table_get - create a parse graph cam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Parse Graph CAM table.
+ */
+static struct ice_pg_cam_item *ice_pg_cam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_CAM,
+				       sizeof(struct ice_pg_cam_item),
+				       ICE_PG_CAM_TABLE_SIZE,
+				       ice_pg_cam_parse_item, false);
+}
+
+/**
+ * ice_pg_sp_cam_table_get - create a parse graph spill cam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Parse Graph Spill CAM table.
+ */
+static struct ice_pg_cam_item *ice_pg_sp_cam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_PG_SPILL,
+				       sizeof(struct ice_pg_cam_item),
+				       ICE_PG_SP_CAM_TABLE_SIZE,
+				       ice_pg_sp_cam_parse_item, false);
+}
+
+/**
+ * ice_pg_nm_cam_table_get - create a parse graph no match cam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Parse Graph No Match CAM table.
+ */
+static struct ice_pg_nm_cam_item *ice_pg_nm_cam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_NOMATCH_CAM,
+				       sizeof(struct ice_pg_nm_cam_item),
+				       ICE_PG_NM_CAM_TABLE_SIZE,
+				       ice_pg_nm_cam_parse_item, false);
+}
+
+/**
+ * ice_pg_nm_sp_cam_table_get - create a parse graph no match spill cam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Parse Graph No Match Spill CAM table.
+ */
+static struct ice_pg_nm_cam_item *ice_pg_nm_sp_cam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_NOMATCH_SPILL,
+				       sizeof(struct ice_pg_nm_cam_item),
+				       ICE_PG_NM_SP_CAM_TABLE_SIZE,
+				       ice_pg_nm_sp_cam_parse_item, false);
+}
+
+static bool __ice_pg_cam_match(struct ice_pg_cam_item *item,
+			       struct ice_pg_cam_key *key)
+{
+	return (item->key.valid &&
+		!memcmp(&item->key.val, &key->val, sizeof(key->val)));
+}
+
+static bool __ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *item,
+				  struct ice_pg_cam_key *key)
+{
+	return (item->key.valid &&
+		!memcmp(&item->key.val, &key->val, sizeof(item->key.val)));
+}
+
+/**
+ * ice_pg_cam_match - search parse graph cam table by key
+ * @table: parse graph cam table to search
+ * @size: cam table size
+ * @key: search key
+ *
+ * Return: a pointer to the matching PG CAM item or NULL.
+ */
+struct ice_pg_cam_item *ice_pg_cam_match(struct ice_pg_cam_item *table,
+					 int size, struct ice_pg_cam_key *key)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		struct ice_pg_cam_item *item = &table[i];
+
+		if (__ice_pg_cam_match(item, key))
+			return item;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_pg_nm_cam_match - search parse graph no match cam table by key
+ * @table: parse graph no match cam table to search
+ * @size: cam table size
+ * @key: search key
+ *
+ * Return: a pointer to the matching PG No Match CAM item or NULL.
+ */
+struct ice_pg_nm_cam_item *
+ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size,
+		    struct ice_pg_cam_key *key)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		struct ice_pg_nm_cam_item *item = &table[i];
+
+		if (__ice_pg_nm_cam_match(item, key))
+			return item;
+	}
+
+	return NULL;
+}
+
+/*** Ternary match ***/
+/* Perform a ternary match on a 1-byte pattern (@pat) given @key and @key_inv
+ * Rules (per bit):
+ *     Key == 0 and Key_inv == 0 : Never match (Don't care)
+ *     Key == 0 and Key_inv == 1 : Match on bit == 1
+ *     Key == 1 and Key_inv == 0 : Match on bit == 0
+ *     Key == 1 and Key_inv == 1 : Always match (Don't care)
+ *
+ * Return: true if all bits match, false otherwise.
+ */
+static bool ice_ternary_match_byte(u8 key, u8 key_inv, u8 pat)
+{
+	u8 bit_key, bit_key_inv, bit_pat;
+	int i;
+
+	for (i = 0; i < BITS_PER_BYTE; i++) {
+		bit_key = key & BIT(i);
+		bit_key_inv = key_inv & BIT(i);
+		bit_pat = pat & BIT(i);
+
+		if (bit_key != 0 && bit_key_inv != 0)
+			continue;
+
+		if ((bit_key == 0 && bit_key_inv == 0) || bit_key == bit_pat)
+			return false;
+	}
+
+	return true;
+}
+
+static bool ice_ternary_match(const u8 *key, const u8 *key_inv,
+			      const u8 *pat, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (!ice_ternary_match_byte(key[i], key_inv[i], pat[i]))
+			return false;
+
+	return true;
+}
+
+/*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/
+static void ice_bst_np_kb_dump(struct ice_hw *hw, struct ice_np_keybuilder *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "next proto key builder:\n");
+	dev_info(dev, "\topc = %d\n", kb->opc);
+	dev_info(dev, "\tstart_reg0 = %d\n", kb->start_reg0);
+	dev_info(dev, "\tlen_reg1 = %d\n", kb->len_reg1);
+}
+
+static void ice_bst_pg_kb_dump(struct ice_hw *hw, struct ice_pg_keybuilder *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "parse graph key builder:\n");
+	dev_info(dev, "\tflag0_ena = %d\n", kb->flag0_ena);
+	dev_info(dev, "\tflag1_ena = %d\n", kb->flag1_ena);
+	dev_info(dev, "\tflag2_ena = %d\n", kb->flag2_ena);
+	dev_info(dev, "\tflag3_ena = %d\n", kb->flag3_ena);
+	dev_info(dev, "\tflag0_idx = %d\n", kb->flag0_idx);
+	dev_info(dev, "\tflag1_idx = %d\n", kb->flag1_idx);
+	dev_info(dev, "\tflag2_idx = %d\n", kb->flag2_idx);
+	dev_info(dev, "\tflag3_idx = %d\n", kb->flag3_idx);
+	dev_info(dev, "\talu_reg_idx = %d\n", kb->alu_reg_idx);
+}
+
+static void ice_bst_alu_dump(struct ice_hw *hw, struct ice_alu *alu, int idx)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "alu%d:\n", idx);
+	dev_info(dev, "\topc = %d\n", alu->opc);
+	dev_info(dev, "\tsrc_start = %d\n", alu->src_start);
+	dev_info(dev, "\tsrc_len = %d\n", alu->src_len);
+	dev_info(dev, "\tshift_xlate_sel = %d\n", alu->shift_xlate_sel);
+	dev_info(dev, "\tshift_xlate_key = %d\n", alu->shift_xlate_key);
+	dev_info(dev, "\tsrc_reg_id = %d\n", alu->src_reg_id);
+	dev_info(dev, "\tdst_reg_id = %d\n", alu->dst_reg_id);
+	dev_info(dev, "\tinc0 = %d\n", alu->inc0);
+	dev_info(dev, "\tinc1 = %d\n", alu->inc1);
+	dev_info(dev, "\tproto_offset_opc = %d\n", alu->proto_offset_opc);
+	dev_info(dev, "\tproto_offset = %d\n", alu->proto_offset);
+	dev_info(dev, "\tbranch_addr = %d\n", alu->branch_addr);
+	dev_info(dev, "\timm = %d\n", alu->imm);
+	dev_info(dev, "\tdst_start = %d\n", alu->dst_start);
+	dev_info(dev, "\tdst_len = %d\n", alu->dst_len);
+	dev_info(dev, "\tflags_extr_imm = %d\n", alu->flags_extr_imm);
+	dev_info(dev, "\tflags_start_imm= %d\n", alu->flags_start_imm);
+}
+
+/**
+ * ice_bst_tcam_dump - dump a boost tcam info
+ * @hw: pointer to the hardware structure
+ * @item: boost tcam to dump
+ */
+static void ice_bst_tcam_dump(struct ice_hw *hw, struct ice_bst_tcam_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "addr = %d\n", item->addr);
+
+	dev_info(dev, "key    : ");
+	for (i = 0; i < ICE_BST_TCAM_KEY_SIZE; i++)
+		dev_info(dev, "%02x ", item->key[i]);
+
+	dev_info(dev, "\n");
+
+	dev_info(dev, "key_inv: ");
+	for (i = 0; i < ICE_BST_TCAM_KEY_SIZE; i++)
+		dev_info(dev, "%02x ", item->key_inv[i]);
+
+	dev_info(dev, "\n");
+
+	dev_info(dev, "hit_idx_grp = %d\n", item->hit_idx_grp);
+	dev_info(dev, "pg_prio = %d\n", item->pg_prio);
+
+	ice_bst_np_kb_dump(hw, &item->np_kb);
+	ice_bst_pg_kb_dump(hw, &item->pg_kb);
+
+	ice_bst_alu_dump(hw, &item->alu0, ICE_ALU0_IDX);
+	ice_bst_alu_dump(hw, &item->alu1, ICE_ALU1_IDX);
+	ice_bst_alu_dump(hw, &item->alu2, ICE_ALU2_IDX);
+}
+
+static void ice_lbl_dump(struct ice_hw *hw, struct ice_lbl_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "index = %u\n", item->idx);
+	dev_info(dev, "type = %u\n", item->type);
+	dev_info(dev, "label = %s\n", item->label);
+}
+
+#define ICE_BST_ALU_OPC		GENMASK_ULL(5, 0)
+#define ICE_BST_ALU_SS		GENMASK_ULL(13, 6)
+#define ICE_BST_ALU_SL		GENMASK_ULL(18, 14)
+#define ICE_BST_ALU_SXS		BIT_ULL(19)
+#define ICE_BST_ALU_SXK		GENMASK_ULL(23, 20)
+#define ICE_BST_ALU_SRID	GENMASK_ULL(30, 24)
+#define ICE_BST_ALU_DRID	GENMASK_ULL(37, 31)
+#define ICE_BST_ALU_INC0	BIT_ULL(38)
+#define ICE_BST_ALU_INC1	BIT_ULL(39)
+#define ICE_BST_ALU_POO		GENMASK_ULL(41, 40)
+#define ICE_BST_ALU_PO		GENMASK_ULL(49, 42)
+#define ICE_BST_ALU_BA_S	50	/* offset for the 2nd 64-bits field */
+#define ICE_BST_ALU_BA		GENMASK_ULL(57 - ICE_BST_ALU_BA_S, \
+					    50 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_IMM		GENMASK_ULL(73 - ICE_BST_ALU_BA_S, \
+					    58 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_DFE		BIT_ULL(74 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_DS		GENMASK_ULL(80 - ICE_BST_ALU_BA_S, \
+					    75 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_DL		GENMASK_ULL(86 - ICE_BST_ALU_BA_S, \
+					    81 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_FEI		BIT_ULL(87 - ICE_BST_ALU_BA_S)
+#define ICE_BST_ALU_FSI		GENMASK_ULL(95 - ICE_BST_ALU_BA_S, \
+					    88 - ICE_BST_ALU_BA_S)
+
+/**
+ * ice_bst_alu_init - parse 96 bits of ALU entry
+ * @alu: pointer to the ALU entry structure
+ * @data: ALU entry data to be parsed
+ * @off: offset of the ALU entry data
+ */
+static void ice_bst_alu_init(struct ice_alu *alu, u8 *data, u8 off)
+{
+	u64 d64;
+	u8 idd;
+
+	d64 = *((u64 *)data) >> off;
+
+	alu->opc		= FIELD_GET(ICE_BST_ALU_OPC, d64);
+	alu->src_start		= FIELD_GET(ICE_BST_ALU_SS, d64);
+	alu->src_len		= FIELD_GET(ICE_BST_ALU_SL, d64);
+	alu->shift_xlate_sel	= FIELD_GET(ICE_BST_ALU_SXS, d64);
+	alu->shift_xlate_key	= FIELD_GET(ICE_BST_ALU_SXK, d64);
+	alu->src_reg_id		= FIELD_GET(ICE_BST_ALU_SRID, d64);
+	alu->dst_reg_id		= FIELD_GET(ICE_BST_ALU_DRID, d64);
+	alu->inc0		= FIELD_GET(ICE_BST_ALU_INC0, d64);
+	alu->inc1		= FIELD_GET(ICE_BST_ALU_INC1, d64);
+	alu->proto_offset_opc	= FIELD_GET(ICE_BST_ALU_POO, d64);
+	alu->proto_offset	= FIELD_GET(ICE_BST_ALU_PO, d64);
+
+	idd = (ICE_BST_ALU_BA_S + off) / BITS_PER_BYTE;
+	off = (ICE_BST_ALU_BA_S + off) % BITS_PER_BYTE;
+	d64 = *((u64 *)(&data[idd])) >> off;
+
+	alu->branch_addr	= FIELD_GET(ICE_BST_ALU_BA, d64);
+	alu->imm		= FIELD_GET(ICE_BST_ALU_IMM, d64);
+	alu->dedicate_flags_ena	= FIELD_GET(ICE_BST_ALU_DFE, d64);
+	alu->dst_start		= FIELD_GET(ICE_BST_ALU_DS, d64);
+	alu->dst_len		= FIELD_GET(ICE_BST_ALU_DL, d64);
+	alu->flags_extr_imm	= FIELD_GET(ICE_BST_ALU_FEI, d64);
+	alu->flags_start_imm	= FIELD_GET(ICE_BST_ALU_FSI, d64);
+}
+
+#define ICE_BST_PGKB_F0_ENA		BIT_ULL(0)
+#define ICE_BST_PGKB_F0_IDX		GENMASK_ULL(6, 1)
+#define ICE_BST_PGKB_F1_ENA		BIT_ULL(7)
+#define ICE_BST_PGKB_F1_IDX		GENMASK_ULL(13, 8)
+#define ICE_BST_PGKB_F2_ENA		BIT_ULL(14)
+#define ICE_BST_PGKB_F2_IDX		GENMASK_ULL(20, 15)
+#define ICE_BST_PGKB_F3_ENA		BIT_ULL(21)
+#define ICE_BST_PGKB_F3_IDX		GENMASK_ULL(27, 22)
+#define ICE_BST_PGKB_AR_IDX		GENMASK_ULL(34, 28)
+
+/**
+ * ice_bst_pgkb_init - parse 35 bits of Parse Graph Key Build
+ * @kb: pointer to the Parse Graph Key Build structure
+ * @data: Parse Graph Key Build data to be parsed
+ */
+static void ice_bst_pgkb_init(struct ice_pg_keybuilder *kb, u64 data)
+{
+	kb->flag0_ena	= FIELD_GET(ICE_BST_PGKB_F0_ENA, data);
+	kb->flag0_idx	= FIELD_GET(ICE_BST_PGKB_F0_IDX, data);
+	kb->flag1_ena	= FIELD_GET(ICE_BST_PGKB_F1_ENA, data);
+	kb->flag1_idx	= FIELD_GET(ICE_BST_PGKB_F1_IDX, data);
+	kb->flag2_ena	= FIELD_GET(ICE_BST_PGKB_F2_ENA, data);
+	kb->flag2_idx	= FIELD_GET(ICE_BST_PGKB_F2_IDX, data);
+	kb->flag3_ena	= FIELD_GET(ICE_BST_PGKB_F3_ENA, data);
+	kb->flag3_idx	= FIELD_GET(ICE_BST_PGKB_F3_IDX, data);
+	kb->alu_reg_idx	= FIELD_GET(ICE_BST_PGKB_AR_IDX, data);
+}
+
+#define ICE_BST_NPKB_OPC	GENMASK(1, 0)
+#define ICE_BST_NPKB_S_R0	GENMASK(9, 2)
+#define ICE_BST_NPKB_L_R1	GENMASK(17, 10)
+
+/**
+ * ice_bst_npkb_init - parse 18 bits of Next Protocol Key Build
+ * @kb: pointer to the Next Protocol Key Build structure
+ * @data: Next Protocol Key Build data to be parsed
+ */
+static void ice_bst_npkb_init(struct ice_np_keybuilder *kb, u32 data)
+{
+	kb->opc		= FIELD_GET(ICE_BST_NPKB_OPC, data);
+	kb->start_reg0	= FIELD_GET(ICE_BST_NPKB_S_R0, data);
+	kb->len_reg1	= FIELD_GET(ICE_BST_NPKB_L_R1, data);
+}
+
+#define ICE_BT_KEY_S		32
+#define ICE_BT_KEY_IDD		(ICE_BT_KEY_S / BITS_PER_BYTE)
+#define ICE_BT_KIV_S		192
+#define ICE_BT_KIV_IDD		(ICE_BT_KIV_S / BITS_PER_BYTE)
+#define ICE_BT_HIG_S		352
+#define ICE_BT_HIG_IDD		(ICE_BT_HIG_S / BITS_PER_BYTE)
+#define ICE_BT_PGP_S		360
+#define ICE_BT_PGP_IDD		(ICE_BT_PGP_S / BITS_PER_BYTE)
+#define ICE_BT_PGP_M		GENMASK(361 - ICE_BT_PGP_S, 360 - ICE_BT_PGP_S)
+#define ICE_BT_NPKB_S		362
+#define ICE_BT_NPKB_IDD		(ICE_BT_NPKB_S / BITS_PER_BYTE)
+#define ICE_BT_NPKB_OFF		(ICE_BT_NPKB_S % BITS_PER_BYTE)
+#define ICE_BT_PGKB_S		380
+#define ICE_BT_PGKB_IDD		(ICE_BT_PGKB_S / BITS_PER_BYTE)
+#define ICE_BT_PGKB_OFF		(ICE_BT_PGKB_S % BITS_PER_BYTE)
+#define ICE_BT_ALU0_S		415
+#define ICE_BT_ALU0_IDD		(ICE_BT_ALU0_S / BITS_PER_BYTE)
+#define ICE_BT_ALU0_OFF		(ICE_BT_ALU0_S % BITS_PER_BYTE)
+#define ICE_BT_ALU1_S		511
+#define ICE_BT_ALU1_IDD		(ICE_BT_ALU1_S / BITS_PER_BYTE)
+#define ICE_BT_ALU1_OFF		(ICE_BT_ALU1_S % BITS_PER_BYTE)
+#define ICE_BT_ALU2_S		607
+#define ICE_BT_ALU2_IDD		(ICE_BT_ALU2_S / BITS_PER_BYTE)
+#define ICE_BT_ALU2_OFF		(ICE_BT_ALU2_S % BITS_PER_BYTE)
+
+/**
+ * ice_bst_parse_item - parse 704 bits of Boost TCAM entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Boost TCAM entry
+ * @item: item of Boost TCAM entry
+ * @data: Boost TCAM entry data to be parsed
+ * @size: size of Boost TCAM entry
+ */
+static void ice_bst_parse_item(struct ice_hw *hw, u16 idx, void *item,
+			       void *data, int __maybe_unused size)
+{
+	struct ice_bst_tcam_item *ti = item;
+	u8 *buf = (u8 *)data;
+	int i;
+
+	ti->addr = *(u16 *)buf;
+
+	for (i = 0; i < ICE_BST_TCAM_KEY_SIZE; i++) {
+		ti->key[i] = buf[ICE_BT_KEY_IDD + i];
+		ti->key_inv[i] = buf[ICE_BT_KIV_IDD + i];
+	}
+	ti->hit_idx_grp	= buf[ICE_BT_HIG_IDD];
+	ti->pg_prio	= buf[ICE_BT_PGP_IDD] & ICE_BT_PGP_M;
+
+	ice_bst_npkb_init(&ti->np_kb,
+			  *((u32 *)(&buf[ICE_BT_NPKB_IDD])) >>
+			   ICE_BT_NPKB_OFF);
+	ice_bst_pgkb_init(&ti->pg_kb,
+			  *((u64 *)(&buf[ICE_BT_PGKB_IDD])) >>
+			   ICE_BT_PGKB_OFF);
+
+	ice_bst_alu_init(&ti->alu0, &buf[ICE_BT_ALU0_IDD], ICE_BT_ALU0_OFF);
+	ice_bst_alu_init(&ti->alu1, &buf[ICE_BT_ALU1_IDD], ICE_BT_ALU1_OFF);
+	ice_bst_alu_init(&ti->alu2, &buf[ICE_BT_ALU2_IDD], ICE_BT_ALU2_OFF);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_bst_tcam_dump(hw, ti);
+}
+
+/**
+ * ice_bst_tcam_table_get - create a boost tcam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Boost TCAM table.
+ */
+static struct ice_bst_tcam_item *ice_bst_tcam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_BOOST_TCAM,
+				       sizeof(struct ice_bst_tcam_item),
+				       ICE_BST_TCAM_TABLE_SIZE,
+				       ice_bst_parse_item, true);
+}
+
+static void ice_parse_lbl_item(struct ice_hw *hw, u16 idx, void *item,
+			       void *data, int __maybe_unused size)
+{
+	struct ice_lbl_item *lbl_item = item;
+	struct ice_lbl_item *lbl_data = data;
+
+	lbl_item->idx = lbl_data->idx;
+	memcpy(lbl_item->label, lbl_data->label, sizeof(lbl_item->label));
+
+	if (strstarts(lbl_item->label, ICE_LBL_BST_DVM))
+		lbl_item->type = ICE_LBL_BST_TYPE_DVM;
+	else if (strstarts(lbl_item->label, ICE_LBL_BST_SVM))
+		lbl_item->type = ICE_LBL_BST_TYPE_SVM;
+	else if (strstarts(lbl_item->label, ICE_LBL_TNL_VXLAN))
+		lbl_item->type = ICE_LBL_BST_TYPE_VXLAN;
+	else if (strstarts(lbl_item->label, ICE_LBL_TNL_GENEVE))
+		lbl_item->type = ICE_LBL_BST_TYPE_GENEVE;
+	else if (strstarts(lbl_item->label, ICE_LBL_TNL_UDP_ECPRI))
+		lbl_item->type = ICE_LBL_BST_TYPE_UDP_ECPRI;
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_lbl_dump(hw, lbl_item);
+}
+
+/**
+ * ice_bst_lbl_table_get - create a boost label table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Boost label table.
+ */
+static struct ice_lbl_item *ice_bst_lbl_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_LBL_RXPARSER_TMEM,
+				       sizeof(struct ice_lbl_item),
+				       ICE_BST_TCAM_TABLE_SIZE,
+				       ice_parse_lbl_item, true);
+}
+
+/**
+ * ice_bst_tcam_match - match a pattern on the boost tcam table
+ * @tcam_table: boost tcam table to search
+ * @pat: pattern to match
+ *
+ * Return: a pointer to the matching Boost TCAM item or NULL.
+ */
+struct ice_bst_tcam_item *
+ice_bst_tcam_match(struct ice_bst_tcam_item *tcam_table, u8 *pat)
+{
+	int i;
+
+	for (i = 0; i < ICE_BST_TCAM_TABLE_SIZE; i++) {
+		struct ice_bst_tcam_item *item = &tcam_table[i];
+
+		if (item->hit_idx_grp == 0)
+			continue;
+		if (ice_ternary_match(item->key, item->key_inv, pat,
+				      ICE_BST_TCAM_KEY_SIZE))
+			return item;
+	}
+
+	return NULL;
+}
+
+/*** ICE_SID_RXPARSER_MARKER_PTYPE section ***/
+/**
+ * ice_ptype_mk_tcam_dump - dump an ptype marker tcam info
+ * @hw: pointer to the hardware structure
+ * @item: ptype marker tcam to dump
+ */
+static void ice_ptype_mk_tcam_dump(struct ice_hw *hw,
+				   struct ice_ptype_mk_tcam_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "address = %d\n", item->address);
+	dev_info(dev, "ptype = %d\n", item->ptype);
+
+	dev_info(dev, "key    :");
+	for (i = 0; i < ICE_PTYPE_MK_TCAM_KEY_SIZE; i++)
+		dev_info(dev, "%02x ", item->key[i]);
+
+	dev_info(dev, "\n");
+
+	dev_info(dev, "key_inv:");
+	for (i = 0; i < ICE_PTYPE_MK_TCAM_KEY_SIZE; i++)
+		dev_info(dev, "%02x ", item->key_inv[i]);
+
+	dev_info(dev, "\n");
+}
+
+static void ice_parse_ptype_mk_tcam_item(struct ice_hw *hw, u16 idx,
+					 void *item, void *data, int size)
+{
+	memcpy(item, data, size);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_ptype_mk_tcam_dump(hw,
+				       (struct ice_ptype_mk_tcam_item *)item);
+}
+
+/**
+ * ice_ptype_mk_tcam_table_get - create a ptype marker tcam table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Marker PType TCAM table.
+ */
+static
+struct ice_ptype_mk_tcam_item *ice_ptype_mk_tcam_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_MARKER_PTYPE,
+				       sizeof(struct ice_ptype_mk_tcam_item),
+				       ICE_PTYPE_MK_TCAM_TABLE_SIZE,
+				       ice_parse_ptype_mk_tcam_item, true);
+}
+
+/**
+ * ice_ptype_mk_tcam_match - match a pattern on a ptype marker tcam table
+ * @table: ptype marker tcam table to search
+ * @pat: pattern to match
+ * @len: length of the pattern
+ *
+ * Return: a pointer to the matching Marker PType item or NULL.
+ */
+struct ice_ptype_mk_tcam_item *
+ice_ptype_mk_tcam_match(struct ice_ptype_mk_tcam_item *table,
+			u8 *pat, int len)
+{
+	int i;
+
+	for (i = 0; i < ICE_PTYPE_MK_TCAM_TABLE_SIZE; i++) {
+		struct ice_ptype_mk_tcam_item *item = &table[i];
+
+		if (ice_ternary_match(item->key, item->key_inv, pat, len))
+			return item;
+	}
+
+	return NULL;
+}
+
+/*** ICE_SID_RXPARSER_MARKER_GRP section ***/
+/**
+ * ice_mk_grp_dump - dump an marker group item info
+ * @hw: pointer to the hardware structure
+ * @item: marker group item to dump
+ */
+static void ice_mk_grp_dump(struct ice_hw *hw, struct ice_mk_grp_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "index = %d\n", item->idx);
+
+	dev_info(dev, "markers: ");
+	for (i = 0; i < ICE_MK_COUNT_PER_GRP; i++)
+		dev_info(dev, "%d ", item->markers[i]);
+
+	dev_info(dev, "\n");
+}
+
+static void ice_mk_grp_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				  void *data, int __maybe_unused size)
+{
+	struct ice_mk_grp_item *grp = item;
+	u8 *buf = data;
+	int i;
+
+	grp->idx = idx;
+
+	for (i = 0; i < ICE_MK_COUNT_PER_GRP; i++)
+		grp->markers[i] = buf[i];
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_mk_grp_dump(hw, grp);
+}
+
+/**
+ * ice_mk_grp_table_get - create a marker group table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Marker Group ID table.
+ */
+static struct ice_mk_grp_item *ice_mk_grp_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_MARKER_GRP,
+				       sizeof(struct ice_mk_grp_item),
+				       ICE_MK_GRP_TABLE_SIZE,
+				       ice_mk_grp_parse_item, false);
+}
+
+/*** ICE_SID_RXPARSER_PROTO_GRP section ***/
+static void ice_proto_off_dump(struct ice_hw *hw,
+			       struct ice_proto_off *po, int idx)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "proto %d\n", idx);
+	dev_info(dev, "\tpolarity = %d\n", po->polarity);
+	dev_info(dev, "\tproto_id = %d\n", po->proto_id);
+	dev_info(dev, "\toffset = %d\n", po->offset);
+}
+
+/**
+ * ice_proto_grp_dump - dump a proto group item info
+ * @hw: pointer to the hardware structure
+ * @item: proto group item to dump
+ */
+static void ice_proto_grp_dump(struct ice_hw *hw,
+			       struct ice_proto_grp_item *item)
+{
+	int i;
+
+	dev_info(ice_hw_to_dev(hw), "index = %d\n", item->idx);
+
+	for (i = 0; i < ICE_PROTO_COUNT_PER_GRP; i++)
+		ice_proto_off_dump(hw, &item->po[i], i);
+}
+
+#define ICE_PO_POL	BIT(0)
+#define ICE_PO_PID	GENMASK(8, 1)
+#define ICE_PO_OFF	GENMASK(21, 12)
+
+/**
+ * ice_proto_off_parse - parse 22 bits of Protocol entry
+ * @po: pointer to the Protocol entry structure
+ * @data: Protocol entry data to be parsed
+ */
+static void ice_proto_off_parse(struct ice_proto_off *po, u32 data)
+{
+	po->polarity = FIELD_GET(ICE_PO_POL, data);
+	po->proto_id = FIELD_GET(ICE_PO_PID, data);
+	po->offset = FIELD_GET(ICE_PO_OFF, data);
+}
+
+/**
+ * ice_proto_grp_parse_item - parse 192 bits of Protocol Group Table entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Protocol Group Table entry
+ * @item: item of Protocol Group Table entry
+ * @data: Protocol Group Table entry data to be parsed
+ * @size: size of Protocol Group Table entry
+ */
+static void ice_proto_grp_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				     void *data, int __maybe_unused size)
+{
+	struct ice_proto_grp_item *grp = item;
+	u8 *buf = (u8 *)data;
+	u8 idd, off;
+	u32 d32;
+	int i;
+
+	grp->idx = idx;
+
+	for (i = 0; i < ICE_PROTO_COUNT_PER_GRP; i++) {
+		idd = (ICE_PROTO_GRP_ITEM_SIZE * i) / BITS_PER_BYTE;
+		off = (ICE_PROTO_GRP_ITEM_SIZE * i) % BITS_PER_BYTE;
+		d32 = *((u32 *)&buf[idd]) >> off;
+		ice_proto_off_parse(&grp->po[i], d32);
+	}
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_proto_grp_dump(hw, grp);
+}
+
+/**
+ * ice_proto_grp_table_get - create a proto group table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Protocol Group table.
+ */
+static struct ice_proto_grp_item *ice_proto_grp_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_PROTO_GRP,
+				       sizeof(struct ice_proto_grp_item),
+				       ICE_PROTO_GRP_TABLE_SIZE,
+				       ice_proto_grp_parse_item, false);
+}
+
+/*** ICE_SID_RXPARSER_FLAG_REDIR section ***/
+/**
+ * ice_flg_rd_dump - dump a flag redirect item info
+ * @hw: pointer to the hardware structure
+ * @item: flag redirect item to dump
+ */
+static void ice_flg_rd_dump(struct ice_hw *hw, struct ice_flg_rd_item *item)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+
+	dev_info(dev, "index = %d\n", item->idx);
+	dev_info(dev, "expose = %d\n", item->expose);
+	dev_info(dev, "intr_flg_id = %d\n", item->intr_flg_id);
+}
+
+#define ICE_FRT_EXPO	BIT(0)
+#define ICE_FRT_IFID	GENMASK(6, 1)
+
+/**
+ * ice_flg_rd_parse_item - parse 8 bits of Flag Redirect Table entry
+ * @hw: pointer to the hardware structure
+ * @idx: index of Flag Redirect Table entry
+ * @item: item of Flag Redirect Table entry
+ * @data: Flag Redirect Table entry data to be parsed
+ * @size: size of Flag Redirect Table entry
+ */
+static void ice_flg_rd_parse_item(struct ice_hw *hw, u16 idx, void *item,
+				  void *data, int __maybe_unused size)
+{
+	struct ice_flg_rd_item *rdi = item;
+	u8 d8 = *(u8 *)data;
+
+	rdi->idx = idx;
+	rdi->expose = FIELD_GET(ICE_FRT_EXPO, d8);
+	rdi->intr_flg_id = FIELD_GET(ICE_FRT_IFID, d8);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_flg_rd_dump(hw, rdi);
+}
+
+/**
+ * ice_flg_rd_table_get - create a flag redirect table
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Flags Redirection table.
+ */
+static struct ice_flg_rd_item *ice_flg_rd_table_get(struct ice_hw *hw)
+{
+	return ice_parser_create_table(hw, ICE_SID_RXPARSER_FLAG_REDIR,
+				       sizeof(struct ice_flg_rd_item),
+				       ICE_FLG_RD_TABLE_SIZE,
+				       ice_flg_rd_parse_item, false);
+}
+
+/**
+ * ice_flg_redirect - redirect a parser flag to packet flag
+ * @table: flag redirect table
+ * @psr_flg: parser flag to redirect
+ *
+ * Return: flag or 0 if @psr_flag = 0.
+ */
+u64 ice_flg_redirect(struct ice_flg_rd_item *table, u64 psr_flg)
+{
+	u64 flg = 0;
+	int i;
+
+	for (i = 0; i < ICE_FLG_RDT_SIZE; i++) {
+		struct ice_flg_rd_item *item = &table[i];
+
+		if (!item->expose)
+			continue;
+
+		if (psr_flg & BIT(item->intr_flg_id))
+			flg |= BIT(i);
+	}
+
+	return flg;
+}
+
+/*** ICE_SID_XLT_KEY_BUILDER_SW, ICE_SID_XLT_KEY_BUILDER_ACL,
+ * ICE_SID_XLT_KEY_BUILDER_FD and ICE_SID_XLT_KEY_BUILDER_RSS
+ * sections ***/
+static void ice_xlt_kb_entry_dump(struct ice_hw *hw,
+				  struct ice_xlt_kb_entry *entry, int idx)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "key builder entry %d\n", idx);
+	dev_info(dev, "\txlt1_ad_sel = %d\n", entry->xlt1_ad_sel);
+	dev_info(dev, "\txlt2_ad_sel = %d\n", entry->xlt2_ad_sel);
+
+	for (i = 0; i < ICE_XLT_KB_FLAG0_14_CNT; i++)
+		dev_info(dev, "\tflg%d_sel = %d\n", i, entry->flg0_14_sel[i]);
+
+	dev_info(dev, "\txlt1_md_sel = %d\n", entry->xlt1_md_sel);
+	dev_info(dev, "\txlt2_md_sel = %d\n", entry->xlt2_md_sel);
+}
+
+/**
+ * ice_xlt_kb_dump - dump a xlt key build info
+ * @hw: pointer to the hardware structure
+ * @kb: key build to dump
+ */
+static void ice_xlt_kb_dump(struct ice_hw *hw, struct ice_xlt_kb *kb)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "xlt1_pm = %d\n", kb->xlt1_pm);
+	dev_info(dev, "xlt2_pm = %d\n", kb->xlt2_pm);
+	dev_info(dev, "prof_id_pm = %d\n", kb->prof_id_pm);
+	dev_info(dev, "flag15 lo = 0x%08x\n", (u32)kb->flag15);
+	dev_info(dev, "flag15 hi = 0x%08x\n",
+		 (u32)(kb->flag15 >> (sizeof(u32) * BITS_PER_BYTE)));
+
+	for (i = 0; i < ICE_XLT_KB_TBL_CNT; i++)
+		ice_xlt_kb_entry_dump(hw, &kb->entries[i], i);
+}
+
+#define ICE_XLT_KB_X1AS_S	32	/* offset for the 1st 64-bits field */
+#define ICE_XLT_KB_X1AS_IDD	(ICE_XLT_KB_X1AS_S / BITS_PER_BYTE)
+#define ICE_XLT_KB_X1AS_OFF	(ICE_XLT_KB_X1AS_S % BITS_PER_BYTE)
+#define ICE_XLT_KB_X1AS		GENMASK_ULL(34 - ICE_XLT_KB_X1AS_S, \
+					    32 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_X2AS		GENMASK_ULL(37 - ICE_XLT_KB_X1AS_S, \
+					    35 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL00		GENMASK_ULL(46 - ICE_XLT_KB_X1AS_S, \
+					    38 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL01		GENMASK_ULL(55 - ICE_XLT_KB_X1AS_S, \
+					    47 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL02		GENMASK_ULL(64 - ICE_XLT_KB_X1AS_S, \
+					    56 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL03		GENMASK_ULL(73 - ICE_XLT_KB_X1AS_S, \
+					    65 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL04		GENMASK_ULL(82 - ICE_XLT_KB_X1AS_S, \
+					    74 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL05		GENMASK_ULL(91 - ICE_XLT_KB_X1AS_S, \
+					    83 - ICE_XLT_KB_X1AS_S)
+#define ICE_XLT_KB_FL06_S	92	/* offset for the 2nd 64-bits field */
+#define ICE_XLT_KB_FL06_IDD	(ICE_XLT_KB_FL06_S / BITS_PER_BYTE)
+#define ICE_XLT_KB_FL06_OFF	(ICE_XLT_KB_FL06_S % BITS_PER_BYTE)
+#define ICE_XLT_KB_FL06		GENMASK_ULL(100 - ICE_XLT_KB_FL06_S, \
+					    92 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL07		GENMASK_ULL(109 - ICE_XLT_KB_FL06_S, \
+					    101 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL08		GENMASK_ULL(118 - ICE_XLT_KB_FL06_S, \
+					    110 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL09		GENMASK_ULL(127 - ICE_XLT_KB_FL06_S, \
+					    119 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL10		GENMASK_ULL(136 - ICE_XLT_KB_FL06_S, \
+					    128 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL11		GENMASK_ULL(145 - ICE_XLT_KB_FL06_S, \
+					    137 - ICE_XLT_KB_FL06_S)
+#define ICE_XLT_KB_FL12_S	146	/* offset for the 3rd 64-bits field */
+#define ICE_XLT_KB_FL12_IDD	(ICE_XLT_KB_FL12_S / BITS_PER_BYTE)
+#define ICE_XLT_KB_FL12_OFF	(ICE_XLT_KB_FL12_S % BITS_PER_BYTE)
+#define ICE_XLT_KB_FL12		GENMASK_ULL(154 - ICE_XLT_KB_FL12_S, \
+					    146 - ICE_XLT_KB_FL12_S)
+#define ICE_XLT_KB_FL13		GENMASK_ULL(163 - ICE_XLT_KB_FL12_S, \
+					    155 - ICE_XLT_KB_FL12_S)
+#define ICE_XLT_KB_FL14		GENMASK_ULL(181 - ICE_XLT_KB_FL12_S, \
+					    164 - ICE_XLT_KB_FL12_S)
+#define ICE_XLT_KB_X1MS		GENMASK_ULL(186 - ICE_XLT_KB_FL12_S, \
+					    182 - ICE_XLT_KB_FL12_S)
+#define ICE_XLT_KB_X2MS		GENMASK_ULL(191 - ICE_XLT_KB_FL12_S, \
+					    187 - ICE_XLT_KB_FL12_S)
+
+/**
+ * ice_kb_entry_init - parse 192 bits of XLT Key Builder entry
+ * @entry: pointer to the XLT Key Builder entry structure
+ * @data: XLT Key Builder entry data to be parsed
+ */
+static void ice_kb_entry_init(struct ice_xlt_kb_entry *entry, u8 *data)
+{
+	u8 i = 0;
+	u64 d64;
+
+	d64 = *((u64 *)&data[ICE_XLT_KB_X1AS_IDD]) >> ICE_XLT_KB_X1AS_OFF;
+
+	entry->xlt1_ad_sel	= FIELD_GET(ICE_XLT_KB_X1AS, d64);
+	entry->xlt2_ad_sel	= FIELD_GET(ICE_XLT_KB_X2AS, d64);
+
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL00, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL01, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL02, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL03, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL04, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL05, d64);
+
+	d64 = *((u64 *)&data[ICE_XLT_KB_FL06_IDD]) >> ICE_XLT_KB_FL06_OFF;
+
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL06, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL07, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL08, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL09, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL10, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL11, d64);
+
+	d64 = *((u64 *)&data[ICE_XLT_KB_FL12_IDD]) >> ICE_XLT_KB_FL12_OFF;
+
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL12, d64);
+	entry->flg0_14_sel[i++]	= FIELD_GET(ICE_XLT_KB_FL13, d64);
+	entry->flg0_14_sel[i]	= FIELD_GET(ICE_XLT_KB_FL14, d64);
+
+	entry->xlt1_md_sel	= FIELD_GET(ICE_XLT_KB_X1MS, d64);
+	entry->xlt2_md_sel	= FIELD_GET(ICE_XLT_KB_X2MS, d64);
+}
+
+#define ICE_XLT_KB_X1PM_OFF	0
+#define ICE_XLT_KB_X2PM_OFF	1
+#define ICE_XLT_KB_PIPM_OFF	2
+#define ICE_XLT_KB_FL15_OFF	4
+#define ICE_XLT_KB_TBL_OFF	12
+
+/**
+ * ice_parse_kb_data - parse 204 bits of XLT Key Build Table
+ * @hw: pointer to the hardware structure
+ * @kb: pointer to the XLT Key Build Table structure
+ * @data: XLT Key Build Table data to be parsed
+ */
+static void ice_parse_kb_data(struct ice_hw *hw, struct ice_xlt_kb *kb,
+			      void *data)
+{
+	u8 *buf = data;
+	int i;
+
+	kb->xlt1_pm	= buf[ICE_XLT_KB_X1PM_OFF];
+	kb->xlt2_pm	= buf[ICE_XLT_KB_X2PM_OFF];
+	kb->prof_id_pm	= buf[ICE_XLT_KB_PIPM_OFF];
+
+	kb->flag15 = *(u64 *)&buf[ICE_XLT_KB_FL15_OFF];
+	for (i = 0; i < ICE_XLT_KB_TBL_CNT; i++)
+		ice_kb_entry_init(&kb->entries[i],
+				  &buf[ICE_XLT_KB_TBL_OFF +
+				       i * ICE_XLT_KB_TBL_ENTRY_SIZE]);
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_xlt_kb_dump(hw, kb);
+}
+
+static struct ice_xlt_kb *ice_xlt_kb_get(struct ice_hw *hw, u32 sect_type)
+{
+	struct ice_pkg_enum state = {};
+	struct ice_seg *seg = hw->seg;
+	struct ice_xlt_kb *kb;
+	void *data;
+
+	if (!seg)
+		return ERR_PTR(-EINVAL);
+
+	kb = kzalloc(sizeof(*kb), GFP_KERNEL);
+	if (!kb)
+		return ERR_PTR(-ENOMEM);
+
+	data = ice_pkg_enum_section(seg, &state, sect_type);
+	if (!data) {
+		ice_debug(hw, ICE_DBG_PARSER, "failed to find section type %d.\n",
+			  sect_type);
+		kfree(kb);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ice_parse_kb_data(hw, kb, data);
+
+	return kb;
+}
+
+/**
+ * ice_xlt_kb_get_sw - create switch xlt key build
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Key Builder table for Switch.
+ */
+static struct ice_xlt_kb *ice_xlt_kb_get_sw(struct ice_hw *hw)
+{
+	return ice_xlt_kb_get(hw, ICE_SID_XLT_KEY_BUILDER_SW);
+}
+
+/**
+ * ice_xlt_kb_get_acl - create acl xlt key build
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Key Builder table for ACL.
+ */
+static struct ice_xlt_kb *ice_xlt_kb_get_acl(struct ice_hw *hw)
+{
+	return ice_xlt_kb_get(hw, ICE_SID_XLT_KEY_BUILDER_ACL);
+}
+
+/**
+ * ice_xlt_kb_get_fd - create fdir xlt key build
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Key Builder table for Flow Director.
+ */
+static struct ice_xlt_kb *ice_xlt_kb_get_fd(struct ice_hw *hw)
+{
+	return ice_xlt_kb_get(hw, ICE_SID_XLT_KEY_BUILDER_FD);
+}
+
+/**
+ * ice_xlt_kb_get_rss - create rss xlt key build
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated Key Builder table for RSS.
+ */
+static struct ice_xlt_kb *ice_xlt_kb_get_rss(struct ice_hw *hw)
+{
+	return ice_xlt_kb_get(hw, ICE_SID_XLT_KEY_BUILDER_RSS);
+}
+
+#define ICE_XLT_KB_MASK		GENMASK_ULL(5, 0)
+
+/**
+ * ice_xlt_kb_flag_get - aggregate 64 bits packet flag into 16 bits xlt flag
+ * @kb: xlt key build
+ * @pkt_flag: 64 bits packet flag
+ *
+ * Return: XLT flag or 0 if @pkt_flag = 0.
+ */
+u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag)
+{
+	struct ice_xlt_kb_entry *entry = &kb->entries[0];
+	u16 flag = 0;
+	int i;
+
+	/* check flag 15 */
+	if (kb->flag15 & pkt_flag)
+		flag = BIT(ICE_XLT_KB_FLAG0_14_CNT);
+
+	/* check flag 0 - 14 */
+	for (i = 0; i < ICE_XLT_KB_FLAG0_14_CNT; i++) {
+		/* only check first entry */
+		u16 idx = entry->flg0_14_sel[i] & ICE_XLT_KB_MASK;
+
+		if (pkt_flag & BIT(idx))
+			flag |= (u16)BIT(i);
+	}
+
+	return flag;
+}
+
+/*** Parser API ***/
+/**
+ * ice_parser_create - create a parser instance
+ * @hw: pointer to the hardware structure
+ *
+ * Return: a pointer to the allocated parser instance or ERR_PTR
+ * in case of error.
+ */
+struct ice_parser *ice_parser_create(struct ice_hw *hw)
+{
+	struct ice_parser *p;
+	void *err;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	p->hw = hw;
+	p->rt.psr = p;
+
+	p->imem_table = ice_imem_table_get(hw);
+	if (IS_ERR(p->imem_table)) {
+		err = p->imem_table;
+		goto err;
+	}
+
+	p->mi_table = ice_metainit_table_get(hw);
+	if (IS_ERR(p->mi_table)) {
+		err = p->mi_table;
+		goto err;
+	}
+
+	p->pg_cam_table = ice_pg_cam_table_get(hw);
+	if (IS_ERR(p->pg_cam_table)) {
+		err = p->pg_cam_table;
+		goto err;
+	}
+
+	p->pg_sp_cam_table = ice_pg_sp_cam_table_get(hw);
+	if (IS_ERR(p->pg_sp_cam_table)) {
+		err = p->pg_sp_cam_table;
+		goto err;
+	}
+
+	p->pg_nm_cam_table = ice_pg_nm_cam_table_get(hw);
+	if (IS_ERR(p->pg_nm_cam_table)) {
+		err = p->pg_nm_cam_table;
+		goto err;
+	}
+
+	p->pg_nm_sp_cam_table = ice_pg_nm_sp_cam_table_get(hw);
+	if (IS_ERR(p->pg_nm_sp_cam_table)) {
+		err = p->pg_nm_sp_cam_table;
+		goto err;
+	}
+
+	p->bst_tcam_table = ice_bst_tcam_table_get(hw);
+	if (IS_ERR(p->bst_tcam_table)) {
+		err = p->bst_tcam_table;
+		goto err;
+	}
+
+	p->bst_lbl_table = ice_bst_lbl_table_get(hw);
+	if (IS_ERR(p->bst_lbl_table)) {
+		err = p->bst_lbl_table;
+		goto err;
+	}
+
+	p->ptype_mk_tcam_table = ice_ptype_mk_tcam_table_get(hw);
+	if (IS_ERR(p->ptype_mk_tcam_table)) {
+		err = p->ptype_mk_tcam_table;
+		goto err;
+	}
+
+	p->mk_grp_table = ice_mk_grp_table_get(hw);
+	if (IS_ERR(p->mk_grp_table)) {
+		err = p->mk_grp_table;
+		goto err;
+	}
+
+	p->proto_grp_table = ice_proto_grp_table_get(hw);
+	if (IS_ERR(p->proto_grp_table)) {
+		err = p->proto_grp_table;
+		goto err;
+	}
+
+	p->flg_rd_table = ice_flg_rd_table_get(hw);
+	if (IS_ERR(p->flg_rd_table)) {
+		err = p->flg_rd_table;
+		goto err;
+	}
+
+	p->xlt_kb_sw = ice_xlt_kb_get_sw(hw);
+	if (IS_ERR(p->xlt_kb_sw)) {
+		err = p->xlt_kb_sw;
+		goto err;
+	}
+
+	p->xlt_kb_acl = ice_xlt_kb_get_acl(hw);
+	if (IS_ERR(p->xlt_kb_acl)) {
+		err = p->xlt_kb_acl;
+		goto err;
+	}
+
+	p->xlt_kb_fd = ice_xlt_kb_get_fd(hw);
+	if (IS_ERR(p->xlt_kb_fd)) {
+		err = p->xlt_kb_fd;
+		goto err;
+	}
+
+	p->xlt_kb_rss = ice_xlt_kb_get_rss(hw);
+	if (IS_ERR(p->xlt_kb_rss)) {
+		err = p->xlt_kb_rss;
+		goto err;
+	}
+
+	return p;
+err:
+	ice_parser_destroy(p);
+	return err;
+}
+
+/**
+ * ice_parser_destroy - destroy a parser instance
+ * @psr: pointer to a parser instance
+ */
+void ice_parser_destroy(struct ice_parser *psr)
+{
+	kfree(psr->imem_table);
+	kfree(psr->mi_table);
+	kfree(psr->pg_cam_table);
+	kfree(psr->pg_sp_cam_table);
+	kfree(psr->pg_nm_cam_table);
+	kfree(psr->pg_nm_sp_cam_table);
+	kfree(psr->bst_tcam_table);
+	kfree(psr->bst_lbl_table);
+	kfree(psr->ptype_mk_tcam_table);
+	kfree(psr->mk_grp_table);
+	kfree(psr->proto_grp_table);
+	kfree(psr->flg_rd_table);
+	kfree(psr->xlt_kb_sw);
+	kfree(psr->xlt_kb_acl);
+	kfree(psr->xlt_kb_fd);
+	kfree(psr->xlt_kb_rss);
+
+	kfree(psr);
+}
+
+/**
+ * ice_parser_run - parse on a packet in binary and return the result
+ * @psr: pointer to a parser instance
+ * @pkt_buf: packet data
+ * @pkt_len: packet length
+ * @rslt: input/output parameter to save parser result.
+ *
+ * Return: 0 on success or errno.
+ */
+int ice_parser_run(struct ice_parser *psr, const u8 *pkt_buf,
+		   int pkt_len, struct ice_parser_result *rslt)
+{
+	ice_parser_rt_reset(&psr->rt);
+	ice_parser_rt_pktbuf_set(&psr->rt, pkt_buf, pkt_len);
+
+	return ice_parser_rt_execute(&psr->rt, rslt);
+}
+
+/**
+ * ice_parser_result_dump - dump a parser result info
+ * @hw: pointer to the hardware structure
+ * @rslt: parser result info to dump
+ */
+void ice_parser_result_dump(struct ice_hw *hw, struct ice_parser_result *rslt)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	int i;
+
+	dev_info(dev, "ptype = %d\n", rslt->ptype);
+	for (i = 0; i < rslt->po_num; i++)
+		dev_info(dev, "proto = %d, offset = %d\n",
+			 rslt->po[i].proto_id, rslt->po[i].offset);
+
+	dev_info(dev, "flags_psr = 0x%016llx\n", rslt->flags_psr);
+	dev_info(dev, "flags_pkt = 0x%016llx\n", rslt->flags_pkt);
+	dev_info(dev, "flags_sw = 0x%04x\n", rslt->flags_sw);
+	dev_info(dev, "flags_fd = 0x%04x\n", rslt->flags_fd);
+	dev_info(dev, "flags_rss = 0x%04x\n", rslt->flags_rss);
+}
+
+#define ICE_BT_VLD_KEY	0xFF
+#define ICE_BT_INV_KEY	0xFE
+
+static void ice_bst_dvm_set(struct ice_parser *psr, enum ice_lbl_type type,
+			    bool on)
+{
+	u16 i = 0;
+
+	while (true) {
+		struct ice_bst_tcam_item *item;
+		u8 key;
+
+		item = ice_bst_tcam_search(psr->bst_tcam_table,
+					   psr->bst_lbl_table,
+					   type, &i);
+		if (!item)
+			break;
+
+		key = on ? ICE_BT_VLD_KEY : ICE_BT_INV_KEY;
+		item->key[ICE_BT_VM_OFF] = key;
+		item->key_inv[ICE_BT_VM_OFF] = key;
+		i++;
+	}
+}
+
+/**
+ * ice_parser_dvm_set - configure double vlan mode for parser
+ * @psr: pointer to a parser instance
+ * @on: true to turn on; false to turn off
+ */
+void ice_parser_dvm_set(struct ice_parser *psr, bool on)
+{
+	ice_bst_dvm_set(psr, ICE_LBL_BST_TYPE_DVM, on);
+	ice_bst_dvm_set(psr, ICE_LBL_BST_TYPE_SVM, !on);
+}
+
+static int ice_tunnel_port_set(struct ice_parser *psr, enum ice_lbl_type type,
+			       u16 udp_port, bool on)
+{
+	u8 *buf = (u8 *)&udp_port;
+	u16 i = 0;
+
+	while (true) {
+		struct ice_bst_tcam_item *item;
+
+		item = ice_bst_tcam_search(psr->bst_tcam_table,
+					   psr->bst_lbl_table,
+					   type, &i);
+		if (!item)
+			break;
+
+		/* found empty slot to add */
+		if (on && item->key[ICE_BT_TUN_PORT_OFF_H] == ICE_BT_INV_KEY &&
+		    item->key_inv[ICE_BT_TUN_PORT_OFF_H] == ICE_BT_INV_KEY) {
+			item->key_inv[ICE_BT_TUN_PORT_OFF_L] =
+						buf[ICE_UDP_PORT_OFF_L];
+			item->key_inv[ICE_BT_TUN_PORT_OFF_H] =
+						buf[ICE_UDP_PORT_OFF_H];
+
+			item->key[ICE_BT_TUN_PORT_OFF_L] =
+				ICE_BT_VLD_KEY - buf[ICE_UDP_PORT_OFF_L];
+			item->key[ICE_BT_TUN_PORT_OFF_H] =
+				ICE_BT_VLD_KEY - buf[ICE_UDP_PORT_OFF_H];
+
+			return 0;
+		/* found a matched slot to delete */
+		} else if (!on &&
+			   (item->key_inv[ICE_BT_TUN_PORT_OFF_L] ==
+				buf[ICE_UDP_PORT_OFF_L] ||
+			    item->key_inv[ICE_BT_TUN_PORT_OFF_H] ==
+				buf[ICE_UDP_PORT_OFF_H])) {
+			item->key_inv[ICE_BT_TUN_PORT_OFF_L] = ICE_BT_VLD_KEY;
+			item->key_inv[ICE_BT_TUN_PORT_OFF_H] = ICE_BT_INV_KEY;
+
+			item->key[ICE_BT_TUN_PORT_OFF_L] = ICE_BT_VLD_KEY;
+			item->key[ICE_BT_TUN_PORT_OFF_H] = ICE_BT_INV_KEY;
+
+			return 0;
+		}
+		i++;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * ice_parser_vxlan_tunnel_set - configure vxlan tunnel for parser
+ * @psr: pointer to a parser instance
+ * @udp_port: vxlan tunnel port in UDP header
+ * @on: true to turn on; false to turn off
+ *
+ * Return: 0 on success or errno on failure.
+ */
+int ice_parser_vxlan_tunnel_set(struct ice_parser *psr,
+				u16 udp_port, bool on)
+{
+	return ice_tunnel_port_set(psr, ICE_LBL_BST_TYPE_VXLAN, udp_port, on);
+}
+
+/**
+ * ice_parser_geneve_tunnel_set - configure geneve tunnel for parser
+ * @psr: pointer to a parser instance
+ * @udp_port: geneve tunnel port in UDP header
+ * @on: true to turn on; false to turn off
+ *
+ * Return: 0 on success or errno on failure.
+ */
+int ice_parser_geneve_tunnel_set(struct ice_parser *psr,
+				 u16 udp_port, bool on)
+{
+	return ice_tunnel_port_set(psr, ICE_LBL_BST_TYPE_GENEVE, udp_port, on);
+}
+
+/**
+ * ice_parser_ecpri_tunnel_set - configure ecpri tunnel for parser
+ * @psr: pointer to a parser instance
+ * @udp_port: ecpri tunnel port in UDP header
+ * @on: true to turn on; false to turn off
+ *
+ * Return: 0 on success or errno on failure.
+ */
+int ice_parser_ecpri_tunnel_set(struct ice_parser *psr,
+				u16 udp_port, bool on)
+{
+	return ice_tunnel_port_set(psr, ICE_LBL_BST_TYPE_UDP_ECPRI,
+				   udp_port, on);
+}
+
+/**
+ * ice_nearest_proto_id - find nearest protocol ID
+ * @rslt: pointer to a parser result instance
+ * @offset: a min value for the protocol offset
+ * @proto_id: the protocol ID (output)
+ * @proto_off: the protocol offset (output)
+ *
+ * From the protocols in @rslt, find the nearest protocol that has offset
+ * larger than @offset.
+ *
+ * Return: if true, the protocol's ID and offset
+ */
+static bool ice_nearest_proto_id(struct ice_parser_result *rslt, u16 offset,
+				 u8 *proto_id, u16 *proto_off)
+{
+	u16 dist = U16_MAX;
+	u8 proto = 0;
+	int i;
+
+	for (i = 0; i < rslt->po_num; i++) {
+		if (offset < rslt->po[i].offset)
+			continue;
+		if (offset - rslt->po[i].offset < dist) {
+			proto = rslt->po[i].proto_id;
+			dist = offset - rslt->po[i].offset;
+		}
+	}
+
+	if (dist % 2)
+		return false;
+
+	*proto_id = proto;
+	*proto_off = dist;
+
+	return true;
+}
+
+/* default flag mask to cover GTP_EH_PDU, GTP_EH_PDU_LINK and TUN2
+ * In future, the flag masks should learn from DDP
+ */
+#define ICE_KEYBUILD_FLAG_MASK_DEFAULT_SW	0x4002
+#define ICE_KEYBUILD_FLAG_MASK_DEFAULT_ACL	0x0000
+#define ICE_KEYBUILD_FLAG_MASK_DEFAULT_FD	0x6080
+#define ICE_KEYBUILD_FLAG_MASK_DEFAULT_RSS	0x6010
+
+/**
+ * ice_parser_profile_init - initialize a FXP profile based on parser result
+ * @rslt: a instance of a parser result
+ * @pkt_buf: packet data buffer
+ * @msk_buf: packet mask buffer
+ * @buf_len: packet length
+ * @blk: FXP pipeline stage
+ * @prof: input/output parameter to save the profile
+ *
+ * Return: 0 on success or errno on failure.
+ */
+int ice_parser_profile_init(struct ice_parser_result *rslt,
+			    const u8 *pkt_buf, const u8 *msk_buf,
+			    int buf_len, enum ice_block blk,
+			    struct ice_parser_profile *prof)
+{
+	u8 proto_id = U8_MAX;
+	u16 proto_off = 0;
+	u16 off;
+
+	memset(prof, 0, sizeof(*prof));
+	set_bit(rslt->ptype, prof->ptypes);
+	if (blk == ICE_BLK_SW) {
+		prof->flags	= rslt->flags_sw;
+		prof->flags_msk	= ICE_KEYBUILD_FLAG_MASK_DEFAULT_SW;
+	} else if (blk == ICE_BLK_ACL) {
+		prof->flags	= rslt->flags_acl;
+		prof->flags_msk	= ICE_KEYBUILD_FLAG_MASK_DEFAULT_ACL;
+	} else if (blk == ICE_BLK_FD) {
+		prof->flags	= rslt->flags_fd;
+		prof->flags_msk	= ICE_KEYBUILD_FLAG_MASK_DEFAULT_FD;
+	} else if (blk == ICE_BLK_RSS) {
+		prof->flags	= rslt->flags_rss;
+		prof->flags_msk	= ICE_KEYBUILD_FLAG_MASK_DEFAULT_RSS;
+	} else {
+		return -EINVAL;
+	}
+
+	for (off = 0; off < buf_len - 1; off++) {
+		if (msk_buf[off] == 0 && msk_buf[off + 1] == 0)
+			continue;
+		if (!ice_nearest_proto_id(rslt, off, &proto_id, &proto_off))
+			continue;
+		if (prof->fv_num >= ICE_PARSER_FV_MAX)
+			return -EINVAL;
+
+		prof->fv[prof->fv_num].proto_id	= proto_id;
+		prof->fv[prof->fv_num].offset	= proto_off;
+		prof->fv[prof->fv_num].spec	= *(const u16 *)&pkt_buf[off];
+		prof->fv[prof->fv_num].msk	= *(const u16 *)&msk_buf[off];
+		prof->fv_num++;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_parser_profile_dump - dump an FXP profile info
+ * @hw: pointer to the hardware structure
+ * @prof: profile info to dump
+ */
+void ice_parser_profile_dump(struct ice_hw *hw,
+			     struct ice_parser_profile *prof)
+{
+	struct device *dev = ice_hw_to_dev(hw);
+	u16 i;
+
+	dev_info(dev, "ptypes:\n");
+	for (i = 0; i < ICE_FLOW_PTYPE_MAX; i++)
+		if (test_bit(i, prof->ptypes))
+			dev_info(dev, "\t%u\n", i);
+
+	for (i = 0; i < prof->fv_num; i++)
+		dev_info(dev, "proto = %u, offset = %2u, spec = 0x%04x, mask = 0x%04x\n",
+			 prof->fv[i].proto_id, prof->fv[i].offset,
+			 prof->fv[i].spec, prof->fv[i].msk);
+
+	dev_info(dev, "flags = 0x%04x\n", prof->flags);
+	dev_info(dev, "flags_msk = 0x%04x\n", prof->flags_msk);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_parser.h b/drivers/net/ethernet/intel/ice/ice_parser.h
new file mode 100644
index 000000000000..6509d807627c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_parser.h
@@ -0,0 +1,540 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef _ICE_PARSER_H_
+#define _ICE_PARSER_H_
+
+#define ICE_SEC_DATA_OFFSET				4
+#define ICE_SID_RXPARSER_IMEM_ENTRY_SIZE		48
+#define ICE_SID_RXPARSER_METADATA_INIT_ENTRY_SIZE	24
+#define ICE_SID_RXPARSER_CAM_ENTRY_SIZE			16
+#define ICE_SID_RXPARSER_PG_SPILL_ENTRY_SIZE		17
+#define ICE_SID_RXPARSER_NOMATCH_CAM_ENTRY_SIZE		12
+#define ICE_SID_RXPARSER_NOMATCH_SPILL_ENTRY_SIZE	13
+#define ICE_SID_RXPARSER_BOOST_TCAM_ENTRY_SIZE		88
+#define ICE_SID_RXPARSER_MARKER_TYPE_ENTRY_SIZE		24
+#define ICE_SID_RXPARSER_MARKER_GRP_ENTRY_SIZE		8
+#define ICE_SID_RXPARSER_PROTO_GRP_ENTRY_SIZE		24
+#define ICE_SID_RXPARSER_FLAG_REDIR_ENTRY_SIZE		1
+
+#define ICE_SEC_LBL_DATA_OFFSET				2
+#define ICE_SID_LBL_ENTRY_SIZE				66
+
+/*** ICE_SID_RXPARSER_IMEM section ***/
+#define ICE_IMEM_TABLE_SIZE		192
+
+/* TCAM boost Master; if bit is set, and TCAM hit, TCAM output overrides iMEM
+ * output.
+ */
+struct ice_bst_main {
+	bool alu0;
+	bool alu1;
+	bool alu2;
+	bool pg;
+};
+
+struct ice_bst_keybuilder {
+	u8 prio;	/* 0-3: PG precedence within ALUs (3 highest) */
+	bool tsr_ctrl;	/* TCAM Search Register control */
+};
+
+/* Next protocol Key builder */
+struct ice_np_keybuilder {
+	u8 opc;
+	u8 start_reg0;
+	u8 len_reg1;
+};
+
+enum ice_np_keybuilder_opcode {
+	ICE_NPKB_OPC_EXTRACT	= 0,
+	ICE_NPKB_OPC_BUILD	= 1,
+	ICE_NPKB_OPC_BYPASS	= 2,
+};
+
+/* Parse Graph Key builder */
+struct ice_pg_keybuilder {
+	bool flag0_ena;
+	bool flag1_ena;
+	bool flag2_ena;
+	bool flag3_ena;
+	u8 flag0_idx;
+	u8 flag1_idx;
+	u8 flag2_idx;
+	u8 flag3_idx;
+	u8 alu_reg_idx;
+};
+
+enum ice_alu_idx {
+	ICE_ALU0_IDX	= 0,
+	ICE_ALU1_IDX	= 1,
+	ICE_ALU2_IDX	= 2,
+};
+
+enum ice_alu_opcode {
+	ICE_ALU_PARK	= 0,
+	ICE_ALU_MOV_ADD	= 1,
+	ICE_ALU_ADD	= 2,
+	ICE_ALU_MOV_AND	= 4,
+	ICE_ALU_AND	= 5,
+	ICE_ALU_AND_IMM	= 6,
+	ICE_ALU_MOV_OR	= 7,
+	ICE_ALU_OR	= 8,
+	ICE_ALU_MOV_XOR	= 9,
+	ICE_ALU_XOR	= 10,
+	ICE_ALU_NOP	= 11,
+	ICE_ALU_BR	= 12,
+	ICE_ALU_BREQ	= 13,
+	ICE_ALU_BRNEQ	= 14,
+	ICE_ALU_BRGT	= 15,
+	ICE_ALU_BRLT	= 16,
+	ICE_ALU_BRGEQ	= 17,
+	ICE_ALU_BRLEG	= 18,
+	ICE_ALU_SETEQ	= 19,
+	ICE_ALU_ANDEQ	= 20,
+	ICE_ALU_OREQ	= 21,
+	ICE_ALU_SETNEQ	= 22,
+	ICE_ALU_ANDNEQ	= 23,
+	ICE_ALU_ORNEQ	= 24,
+	ICE_ALU_SETGT	= 25,
+	ICE_ALU_ANDGT	= 26,
+	ICE_ALU_ORGT	= 27,
+	ICE_ALU_SETLT	= 28,
+	ICE_ALU_ANDLT	= 29,
+	ICE_ALU_ORLT	= 30,
+	ICE_ALU_MOV_SUB	= 31,
+	ICE_ALU_SUB	= 32,
+	ICE_ALU_INVALID	= 64,
+};
+
+enum ice_proto_off_opcode {
+	ICE_PO_OFF_REMAIN	= 0,
+	ICE_PO_OFF_HDR_ADD	= 1,
+	ICE_PO_OFF_HDR_SUB	= 2,
+};
+
+struct ice_alu {
+	enum ice_alu_opcode opc;
+	u8 src_start;
+	u8 src_len;
+	bool shift_xlate_sel;
+	u8 shift_xlate_key;
+	u8 src_reg_id;
+	u8 dst_reg_id;
+	bool inc0;
+	bool inc1;
+	u8 proto_offset_opc;
+	u8 proto_offset;
+	u8 branch_addr;
+	u16 imm;
+	bool dedicate_flags_ena;
+	u8 dst_start;
+	u8 dst_len;
+	bool flags_extr_imm;
+	u8 flags_start_imm;
+};
+
+/* Parser program code (iMEM) */
+struct ice_imem_item {
+	u16 idx;
+	struct ice_bst_main b_m;
+	struct ice_bst_keybuilder b_kb;
+	u8 pg_prio;
+	struct ice_np_keybuilder np_kb;
+	struct ice_pg_keybuilder pg_kb;
+	struct ice_alu alu0;
+	struct ice_alu alu1;
+	struct ice_alu alu2;
+};
+
+/*** ICE_SID_RXPARSER_METADATA_INIT section ***/
+#define ICE_METAINIT_TABLE_SIZE		16
+
+/* Metadata Initialization item  */
+struct ice_metainit_item {
+	u16 idx;
+
+	u8 tsr;		/* TCAM Search key Register */
+	u16 ho;		/* Header Offset register */
+	u16 pc;		/* Program Counter register */
+	u16 pg_rn;	/* Parse Graph Root Node */
+	u8 cd;		/* Control Domain ID */
+
+	/* General Purpose Registers */
+	bool gpr_a_ctrl;
+	u8 gpr_a_data_mdid;
+	u8 gpr_a_data_start;
+	u8 gpr_a_data_len;
+	u8 gpr_a_id;
+
+	bool gpr_b_ctrl;
+	u8 gpr_b_data_mdid;
+	u8 gpr_b_data_start;
+	u8 gpr_b_data_len;
+	u8 gpr_b_id;
+
+	bool gpr_c_ctrl;
+	u8 gpr_c_data_mdid;
+	u8 gpr_c_data_start;
+	u8 gpr_c_data_len;
+	u8 gpr_c_id;
+
+	bool gpr_d_ctrl;
+	u8 gpr_d_data_mdid;
+	u8 gpr_d_data_start;
+	u8 gpr_d_data_len;
+	u8 gpr_d_id;
+
+	u64 flags; /* Initial value for all flags */
+};
+
+/*** ICE_SID_RXPARSER_CAM, ICE_SID_RXPARSER_PG_SPILL,
+ *    ICE_SID_RXPARSER_NOMATCH_CAM and ICE_SID_RXPARSER_NOMATCH_CAM
+ *    sections ***/
+#define ICE_PG_CAM_TABLE_SIZE		2048
+#define ICE_PG_SP_CAM_TABLE_SIZE	128
+#define ICE_PG_NM_CAM_TABLE_SIZE	1024
+#define ICE_PG_NM_SP_CAM_TABLE_SIZE	64
+
+struct ice_pg_cam_key {
+	bool valid;
+	struct_group_attr(val, __packed,
+		u16 node_id;	/* Node ID of protocol in parse graph */
+		bool flag0;
+		bool flag1;
+		bool flag2;
+		bool flag3;
+		u8 boost_idx;	/* Boost TCAM match index */
+		u16 alu_reg;
+		u32 next_proto;	/* next Protocol value (must be last) */
+	);
+};
+
+struct ice_pg_nm_cam_key {
+	bool valid;
+	struct_group_attr(val, __packed,
+		u16 node_id;
+		bool flag0;
+		bool flag1;
+		bool flag2;
+		bool flag3;
+		u8 boost_idx;
+		u16 alu_reg;
+	);
+};
+
+struct ice_pg_cam_action {
+	u16 next_node;	/* Parser Node ID for the next round */
+	u8 next_pc;	/* next Program Counter */
+	bool is_pg;	/* is protocol group */
+	u8 proto_id;	/* protocol ID or proto group ID */
+	bool is_mg;	/* is marker group */
+	u8 marker_id;	/* marker ID or marker group ID */
+	bool is_last_round;
+	bool ho_polarity; /* header offset polarity */
+	u16 ho_inc;
+};
+
+/* Parse Graph item */
+struct ice_pg_cam_item {
+	u16 idx;
+	struct ice_pg_cam_key key;
+	struct ice_pg_cam_action action;
+};
+
+/* Parse Graph No Match item */
+struct ice_pg_nm_cam_item {
+	u16 idx;
+	struct ice_pg_nm_cam_key key;
+	struct ice_pg_cam_action action;
+};
+
+struct ice_pg_cam_item *ice_pg_cam_match(struct ice_pg_cam_item *table,
+					 int size, struct ice_pg_cam_key *key);
+struct ice_pg_nm_cam_item *
+ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size,
+		    struct ice_pg_cam_key *key);
+
+/*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/
+#define ICE_BST_TCAM_TABLE_SIZE		256
+#define ICE_BST_TCAM_KEY_SIZE		20
+#define ICE_BST_KEY_TCAM_SIZE		19
+
+/* Boost TCAM item */
+struct ice_bst_tcam_item {
+	u16 addr;
+	u8 key[ICE_BST_TCAM_KEY_SIZE];
+	u8 key_inv[ICE_BST_TCAM_KEY_SIZE];
+	u8 hit_idx_grp;
+	u8 pg_prio;
+	struct ice_np_keybuilder np_kb;
+	struct ice_pg_keybuilder pg_kb;
+	struct ice_alu alu0;
+	struct ice_alu alu1;
+	struct ice_alu alu2;
+};
+
+#define ICE_LBL_LEN			64
+#define ICE_LBL_BST_DVM			"BOOST_MAC_VLAN_DVM"
+#define ICE_LBL_BST_SVM			"BOOST_MAC_VLAN_SVM"
+#define ICE_LBL_TNL_VXLAN		"TNL_VXLAN"
+#define ICE_LBL_TNL_GENEVE		"TNL_GENEVE"
+#define ICE_LBL_TNL_UDP_ECPRI		"TNL_UDP_ECPRI"
+
+enum ice_lbl_type {
+	ICE_LBL_BST_TYPE_UNKNOWN,
+	ICE_LBL_BST_TYPE_DVM,
+	ICE_LBL_BST_TYPE_SVM,
+	ICE_LBL_BST_TYPE_VXLAN,
+	ICE_LBL_BST_TYPE_GENEVE,
+	ICE_LBL_BST_TYPE_UDP_ECPRI,
+};
+
+struct ice_lbl_item {
+	u16 idx;
+	char label[ICE_LBL_LEN];
+
+	/* must be at the end, not part of the DDP section */
+	enum ice_lbl_type type;
+};
+
+struct ice_bst_tcam_item *
+ice_bst_tcam_match(struct ice_bst_tcam_item *tcam_table, u8 *pat);
+struct ice_bst_tcam_item *
+ice_bst_tcam_search(struct ice_bst_tcam_item *tcam_table,
+		    struct ice_lbl_item *lbl_table,
+		    enum ice_lbl_type type, u16 *start);
+
+/*** ICE_SID_RXPARSER_MARKER_PTYPE section ***/
+#define ICE_PTYPE_MK_TCAM_TABLE_SIZE	1024
+#define ICE_PTYPE_MK_TCAM_KEY_SIZE	10
+
+struct ice_ptype_mk_tcam_item {
+	u16 address;
+	u16 ptype;
+	u8 key[ICE_PTYPE_MK_TCAM_KEY_SIZE];
+	u8 key_inv[ICE_PTYPE_MK_TCAM_KEY_SIZE];
+} __packed;
+
+struct ice_ptype_mk_tcam_item *
+ice_ptype_mk_tcam_match(struct ice_ptype_mk_tcam_item *table,
+			u8 *pat, int len);
+/*** ICE_SID_RXPARSER_MARKER_GRP section ***/
+#define ICE_MK_GRP_TABLE_SIZE		128
+#define ICE_MK_COUNT_PER_GRP		8
+
+/*  Marker Group item */
+struct ice_mk_grp_item {
+	int idx;
+	u8 markers[ICE_MK_COUNT_PER_GRP];
+};
+
+/*** ICE_SID_RXPARSER_PROTO_GRP section ***/
+#define ICE_PROTO_COUNT_PER_GRP		8
+#define ICE_PROTO_GRP_TABLE_SIZE	192
+#define ICE_PROTO_GRP_ITEM_SIZE		22
+struct ice_proto_off {
+	bool polarity;	/* true: positive, false: negative */
+	u8 proto_id;
+	u16 offset;	/* 10 bit protocol offset */
+};
+
+/*  Protocol Group item */
+struct ice_proto_grp_item {
+	u16 idx;
+	struct ice_proto_off po[ICE_PROTO_COUNT_PER_GRP];
+};
+
+/*** ICE_SID_RXPARSER_FLAG_REDIR section ***/
+#define ICE_FLG_RD_TABLE_SIZE	64
+#define ICE_FLG_RDT_SIZE	64
+
+/* Flags Redirection item */
+struct ice_flg_rd_item {
+	u16 idx;
+	bool expose;
+	u8 intr_flg_id;	/* Internal Flag ID */
+};
+
+u64 ice_flg_redirect(struct ice_flg_rd_item *table, u64 psr_flg);
+
+/*** ICE_SID_XLT_KEY_BUILDER_SW, ICE_SID_XLT_KEY_BUILDER_ACL,
+ * ICE_SID_XLT_KEY_BUILDER_FD and ICE_SID_XLT_KEY_BUILDER_RSS
+ * sections ***/
+#define ICE_XLT_KB_FLAG0_14_CNT		15
+#define ICE_XLT_KB_TBL_CNT		8
+#define ICE_XLT_KB_TBL_ENTRY_SIZE	24
+
+struct ice_xlt_kb_entry {
+	u8 xlt1_ad_sel;
+	u8 xlt2_ad_sel;
+	u16 flg0_14_sel[ICE_XLT_KB_FLAG0_14_CNT];
+	u8 xlt1_md_sel;
+	u8 xlt2_md_sel;
+};
+
+/* XLT Key Builder */
+struct ice_xlt_kb {
+	u8 xlt1_pm;	/* XLT1 Partition Mode */
+	u8 xlt2_pm;	/* XLT2 Partition Mode */
+	u8 prof_id_pm;	/* Profile ID Partition Mode */
+	u64 flag15;
+
+	struct ice_xlt_kb_entry entries[ICE_XLT_KB_TBL_CNT];
+};
+
+u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag);
+
+/*** Parser API ***/
+#define ICE_GPR_HV_IDX		64
+#define ICE_GPR_HV_SIZE		32
+#define ICE_GPR_ERR_IDX		84
+#define ICE_GPR_FLG_IDX		104
+#define ICE_GPR_FLG_SIZE	16
+
+#define ICE_GPR_TSR_IDX		108	/* TSR: TCAM Search Register */
+#define ICE_GPR_NN_IDX		109	/* NN: Next Parsing Cycle Node ID */
+#define ICE_GPR_HO_IDX		110	/* HO: Next Parsing Cycle hdr Offset */
+#define ICE_GPR_NP_IDX		111	/* NP: Next Parsing Cycle */
+
+#define ICE_PARSER_MAX_PKT_LEN	504
+#define ICE_PARSER_PKT_REV	32
+#define ICE_PARSER_GPR_NUM	128
+#define ICE_PARSER_FLG_NUM	64
+#define ICE_PARSER_ERR_NUM	16
+#define ICE_BST_KEY_SIZE	10
+#define ICE_MARKER_ID_SIZE	9
+#define ICE_MARKER_MAX_SIZE	\
+		(ICE_MARKER_ID_SIZE * BITS_PER_BYTE - 1)
+#define ICE_MARKER_ID_NUM	8
+#define ICE_PO_PAIR_SIZE	256
+
+struct ice_gpr_pu {
+	/* array of flags to indicate if GRP needs to be updated */
+	bool gpr_val_upd[ICE_PARSER_GPR_NUM];
+	u16 gpr_val[ICE_PARSER_GPR_NUM];
+	u64 flg_msk;
+	u64 flg_val;
+	u16 err_msk;
+	u16 err_val;
+};
+
+enum ice_pg_prio {
+	ICE_PG_P0	= 0,
+	ICE_PG_P1	= 1,
+	ICE_PG_P2	= 2,
+	ICE_PG_P3	= 3,
+};
+
+struct ice_parser_rt {
+	struct ice_parser *psr;
+	u16 gpr[ICE_PARSER_GPR_NUM];
+	u8 pkt_buf[ICE_PARSER_MAX_PKT_LEN + ICE_PARSER_PKT_REV];
+	u16 pkt_len;
+	u16 po;
+	u8 bst_key[ICE_BST_KEY_SIZE];
+	struct ice_pg_cam_key pg_key;
+	struct ice_alu *alu0;
+	struct ice_alu *alu1;
+	struct ice_alu *alu2;
+	struct ice_pg_cam_action *action;
+	u8 pg_prio;
+	struct ice_gpr_pu pu;
+	u8 markers[ICE_MARKER_ID_SIZE];
+	bool protocols[ICE_PO_PAIR_SIZE];
+	u16 offsets[ICE_PO_PAIR_SIZE];
+};
+
+struct ice_parser_proto_off {
+	u8 proto_id;	/* hardware protocol ID */
+	u16 offset;	/* offset from the start of the protocol header */
+};
+
+#define ICE_PARSER_PROTO_OFF_PAIR_SIZE	16
+#define ICE_PARSER_FLAG_PSR_SIZE	8
+#define ICE_PARSER_FV_SIZE		48
+#define ICE_PARSER_FV_MAX		24
+#define ICE_BT_TUN_PORT_OFF_H		16
+#define ICE_BT_TUN_PORT_OFF_L		15
+#define ICE_BT_VM_OFF			0
+#define ICE_UDP_PORT_OFF_H		1
+#define ICE_UDP_PORT_OFF_L		0
+
+struct ice_parser_result {
+	u16 ptype;	/* 16 bits hardware PTYPE */
+	/* array of protocol and header offset pairs */
+	struct ice_parser_proto_off po[ICE_PARSER_PROTO_OFF_PAIR_SIZE];
+	int po_num;	/* # of protocol-offset pairs must <= 16 */
+	u64 flags_psr;	/* parser flags */
+	u64 flags_pkt;	/* packet flags */
+	u16 flags_sw;	/* key builder flags for SW */
+	u16 flags_acl;	/* key builder flags for ACL */
+	u16 flags_fd;	/* key builder flags for FD */
+	u16 flags_rss;	/* key builder flags for RSS */
+};
+
+void ice_parser_rt_reset(struct ice_parser_rt *rt);
+void ice_parser_rt_pktbuf_set(struct ice_parser_rt *rt, const u8 *pkt_buf,
+			      int pkt_len);
+int ice_parser_rt_execute(struct ice_parser_rt *rt,
+			  struct ice_parser_result *rslt);
+
+struct ice_parser {
+	struct ice_hw *hw; /* pointer to the hardware structure */
+
+	struct ice_imem_item *imem_table;
+	struct ice_metainit_item *mi_table;
+
+	struct ice_pg_cam_item *pg_cam_table;
+	struct ice_pg_cam_item *pg_sp_cam_table;
+	struct ice_pg_nm_cam_item *pg_nm_cam_table;
+	struct ice_pg_nm_cam_item *pg_nm_sp_cam_table;
+
+	struct ice_bst_tcam_item *bst_tcam_table;
+	struct ice_lbl_item *bst_lbl_table;
+	struct ice_ptype_mk_tcam_item *ptype_mk_tcam_table;
+	struct ice_mk_grp_item *mk_grp_table;
+	struct ice_proto_grp_item *proto_grp_table;
+	struct ice_flg_rd_item *flg_rd_table;
+
+	struct ice_xlt_kb *xlt_kb_sw;
+	struct ice_xlt_kb *xlt_kb_acl;
+	struct ice_xlt_kb *xlt_kb_fd;
+	struct ice_xlt_kb *xlt_kb_rss;
+
+	struct ice_parser_rt rt;
+};
+
+struct ice_parser *ice_parser_create(struct ice_hw *hw);
+void ice_parser_destroy(struct ice_parser *psr);
+void ice_parser_dvm_set(struct ice_parser *psr, bool on);
+int ice_parser_vxlan_tunnel_set(struct ice_parser *psr, u16 udp_port, bool on);
+int ice_parser_geneve_tunnel_set(struct ice_parser *psr, u16 udp_port, bool on);
+int ice_parser_ecpri_tunnel_set(struct ice_parser *psr, u16 udp_port, bool on);
+int ice_parser_run(struct ice_parser *psr, const u8 *pkt_buf,
+		   int pkt_len, struct ice_parser_result *rslt);
+void ice_parser_result_dump(struct ice_hw *hw, struct ice_parser_result *rslt);
+
+struct ice_parser_fv {
+	u8 proto_id;	/* hardware protocol ID */
+	u16 offset;	/* offset from the start of the protocol header */
+	u16 spec;	/* pattern to match */
+	u16 msk;	/* pattern mask */
+};
+
+struct ice_parser_profile {
+	/* array of field vectors */
+	struct ice_parser_fv fv[ICE_PARSER_FV_SIZE];
+	int fv_num;		/* # of field vectors must <= 48 */
+	u16 flags;		/* key builder flags */
+	u16 flags_msk;		/* key builder flag mask */
+
+	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX); /* PTYPE bitmap */
+};
+
+int ice_parser_profile_init(struct ice_parser_result *rslt,
+			    const u8 *pkt_buf, const u8 *msk_buf,
+			    int buf_len, enum ice_block blk,
+			    struct ice_parser_profile *prof);
+void ice_parser_profile_dump(struct ice_hw *hw,
+			     struct ice_parser_profile *prof);
+#endif /* _ICE_PARSER_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_parser_rt.c b/drivers/net/ethernet/intel/ice/ice_parser_rt.c
new file mode 100644
index 000000000000..dedf5e854e4b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_parser_rt.c
@@ -0,0 +1,861 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024 Intel Corporation */
+
+#include "ice_common.h"
+
+static void ice_rt_tsr_set(struct ice_parser_rt *rt, u16 tsr)
+{
+	rt->gpr[ICE_GPR_TSR_IDX] = tsr;
+}
+
+static void ice_rt_ho_set(struct ice_parser_rt *rt, u16 ho)
+{
+	rt->gpr[ICE_GPR_HO_IDX] = ho;
+	memcpy(&rt->gpr[ICE_GPR_HV_IDX], &rt->pkt_buf[ho], ICE_GPR_HV_SIZE);
+}
+
+static void ice_rt_np_set(struct ice_parser_rt *rt, u16 pc)
+{
+	rt->gpr[ICE_GPR_NP_IDX] = pc;
+}
+
+static void ice_rt_nn_set(struct ice_parser_rt *rt, u16 node)
+{
+	rt->gpr[ICE_GPR_NN_IDX] = node;
+}
+
+static void
+ice_rt_flag_set(struct ice_parser_rt *rt, unsigned int idx, bool set)
+{
+	struct ice_hw *hw = rt->psr->hw;
+	unsigned int word, id;
+
+	word = idx / ICE_GPR_FLG_SIZE;
+	id = idx % ICE_GPR_FLG_SIZE;
+
+	if (set) {
+		rt->gpr[ICE_GPR_FLG_IDX + word] |= (u16)BIT(id);
+		ice_debug(hw, ICE_DBG_PARSER, "Set parser flag %u\n", idx);
+	} else {
+		rt->gpr[ICE_GPR_FLG_IDX + word] &= ~(u16)BIT(id);
+		ice_debug(hw, ICE_DBG_PARSER, "Clear parser flag %u\n", idx);
+	}
+}
+
+static void ice_rt_gpr_set(struct ice_parser_rt *rt, int idx, u16 val)
+{
+	struct ice_hw *hw = rt->psr->hw;
+
+	if (idx == ICE_GPR_HO_IDX)
+		ice_rt_ho_set(rt, val);
+	else
+		rt->gpr[idx] = val;
+
+	ice_debug(hw, ICE_DBG_PARSER, "Set GPR %d value %d\n", idx, val);
+}
+
+static void ice_rt_err_set(struct ice_parser_rt *rt, unsigned int idx, bool set)
+{
+	struct ice_hw *hw = rt->psr->hw;
+
+	if (set) {
+		rt->gpr[ICE_GPR_ERR_IDX] |= (u16)BIT(idx);
+		ice_debug(hw, ICE_DBG_PARSER, "Set parser error %u\n", idx);
+	} else {
+		rt->gpr[ICE_GPR_ERR_IDX] &= ~(u16)BIT(idx);
+		ice_debug(hw, ICE_DBG_PARSER, "Reset parser error %u\n", idx);
+	}
+}
+
+/**
+ * ice_parser_rt_reset - reset the parser runtime
+ * @rt: pointer to the parser runtime
+ */
+void ice_parser_rt_reset(struct ice_parser_rt *rt)
+{
+	struct ice_parser *psr = rt->psr;
+	struct ice_metainit_item *mi;
+	unsigned int i;
+
+	mi = &psr->mi_table[0];
+
+	memset(rt, 0, sizeof(*rt));
+	rt->psr = psr;
+
+	ice_rt_tsr_set(rt, mi->tsr);
+	ice_rt_ho_set(rt, mi->ho);
+	ice_rt_np_set(rt, mi->pc);
+	ice_rt_nn_set(rt, mi->pg_rn);
+
+	for (i = 0; i < ICE_PARSER_FLG_NUM; i++) {
+		if (mi->flags & BIT(i))
+			ice_rt_flag_set(rt, i, true);
+	}
+}
+
+/**
+ * ice_parser_rt_pktbuf_set - set a packet into parser runtime
+ * @rt: pointer to the parser runtime
+ * @pkt_buf: buffer with packet data
+ * @pkt_len: packet buffer length
+ */
+void ice_parser_rt_pktbuf_set(struct ice_parser_rt *rt, const u8 *pkt_buf,
+			      int pkt_len)
+{
+	int len = min(ICE_PARSER_MAX_PKT_LEN, pkt_len);
+	u16 ho = rt->gpr[ICE_GPR_HO_IDX];
+
+	memcpy(rt->pkt_buf, pkt_buf, len);
+	rt->pkt_len = pkt_len;
+
+	memcpy(&rt->gpr[ICE_GPR_HV_IDX], &rt->pkt_buf[ho], ICE_GPR_HV_SIZE);
+}
+
+static void ice_bst_key_init(struct ice_parser_rt *rt,
+			     struct ice_imem_item *imem)
+{
+	u8 tsr = (u8)rt->gpr[ICE_GPR_TSR_IDX];
+	u16 ho = rt->gpr[ICE_GPR_HO_IDX];
+	u8 *key = rt->bst_key;
+	int idd, i;
+
+	idd = ICE_BST_TCAM_KEY_SIZE - 1;
+	if (imem->b_kb.tsr_ctrl)
+		key[idd] = tsr;
+	else
+		key[idd] = imem->b_kb.prio;
+
+	idd = ICE_BST_KEY_TCAM_SIZE - 1;
+	for (i = idd; i >= 0; i--) {
+		int j;
+
+		j = ho + idd - i;
+		if (j < ICE_PARSER_MAX_PKT_LEN)
+			key[i] = rt->pkt_buf[ho + idd - i];
+		else
+			key[i] = 0;
+	}
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generated Boost TCAM Key:\n");
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
+		  key[0], key[1], key[2], key[3], key[4],
+		  key[5], key[6], key[7], key[8], key[9]);
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "\n");
+}
+
+static u16 ice_bit_rev_u16(u16 v, int len)
+{
+	return bitrev16(v) >> (BITS_PER_TYPE(v) - len);
+}
+
+static u32 ice_bit_rev_u32(u32 v, int len)
+{
+	return bitrev32(v) >> (BITS_PER_TYPE(v) - len);
+}
+
+static u32 ice_hv_bit_sel(struct ice_parser_rt *rt, int start, int len)
+{
+	int offset;
+	u32 buf[2];
+	u64 val;
+
+	offset = ICE_GPR_HV_IDX + (start / BITS_PER_TYPE(u16));
+
+	memcpy(buf, &rt->gpr[offset], sizeof(buf));
+
+	buf[0] = bitrev8x4(buf[0]);
+	buf[1] = bitrev8x4(buf[1]);
+
+	val = *(u64 *)buf;
+	val >>= start % BITS_PER_TYPE(u16);
+
+	return ice_bit_rev_u32(val, len);
+}
+
+static u32 ice_pk_build(struct ice_parser_rt *rt,
+			struct ice_np_keybuilder *kb)
+{
+	if (kb->opc == ICE_NPKB_OPC_EXTRACT)
+		return ice_hv_bit_sel(rt, kb->start_reg0, kb->len_reg1);
+	else if (kb->opc == ICE_NPKB_OPC_BUILD)
+		return rt->gpr[kb->start_reg0] |
+		       ((u32)rt->gpr[kb->len_reg1] << BITS_PER_TYPE(u16));
+	else if (kb->opc == ICE_NPKB_OPC_BYPASS)
+		return 0;
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Unsupported OP Code %u\n",
+		  kb->opc);
+	return U32_MAX;
+}
+
+static bool ice_flag_get(struct ice_parser_rt *rt, unsigned int index)
+{
+	int word = index / ICE_GPR_FLG_SIZE;
+	int id = index % ICE_GPR_FLG_SIZE;
+
+	return !!(rt->gpr[ICE_GPR_FLG_IDX + word] & (u16)BIT(id));
+}
+
+static int ice_imem_pgk_init(struct ice_parser_rt *rt,
+			     struct ice_imem_item *imem)
+{
+	memset(&rt->pg_key, 0, sizeof(rt->pg_key));
+	rt->pg_key.next_proto = ice_pk_build(rt, &imem->np_kb);
+	if (rt->pg_key.next_proto == U32_MAX)
+		return -EINVAL;
+
+	if (imem->pg_kb.flag0_ena)
+		rt->pg_key.flag0 = ice_flag_get(rt, imem->pg_kb.flag0_idx);
+	if (imem->pg_kb.flag1_ena)
+		rt->pg_key.flag1 = ice_flag_get(rt, imem->pg_kb.flag1_idx);
+	if (imem->pg_kb.flag2_ena)
+		rt->pg_key.flag2 = ice_flag_get(rt, imem->pg_kb.flag2_idx);
+	if (imem->pg_kb.flag3_ena)
+		rt->pg_key.flag3 = ice_flag_get(rt, imem->pg_kb.flag3_idx);
+
+	rt->pg_key.alu_reg = rt->gpr[imem->pg_kb.alu_reg_idx];
+	rt->pg_key.node_id = rt->gpr[ICE_GPR_NN_IDX];
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generate Parse Graph Key: node_id(%d), flag0-3(%d,%d,%d,%d), boost_idx(%d), alu_reg(0x%04x), next_proto(0x%08x)\n",
+		  rt->pg_key.node_id,
+		  rt->pg_key.flag0,
+		  rt->pg_key.flag1,
+		  rt->pg_key.flag2,
+		  rt->pg_key.flag3,
+		  rt->pg_key.boost_idx,
+		  rt->pg_key.alu_reg,
+		  rt->pg_key.next_proto);
+
+	return 0;
+}
+
+static void ice_imem_alu0_set(struct ice_parser_rt *rt,
+			      struct ice_imem_item *imem)
+{
+	rt->alu0 = &imem->alu0;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU0 from imem pc %d\n",
+		  imem->idx);
+}
+
+static void ice_imem_alu1_set(struct ice_parser_rt *rt,
+			      struct ice_imem_item *imem)
+{
+	rt->alu1 = &imem->alu1;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU1 from imem pc %d\n",
+		  imem->idx);
+}
+
+static void ice_imem_alu2_set(struct ice_parser_rt *rt,
+			      struct ice_imem_item *imem)
+{
+	rt->alu2 = &imem->alu2;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU2 from imem pc %d\n",
+		  imem->idx);
+}
+
+static void ice_imem_pgp_set(struct ice_parser_rt *rt,
+			     struct ice_imem_item *imem)
+{
+	rt->pg_prio = imem->pg_prio;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load PG priority %d from imem pc %d\n",
+		  rt->pg_prio, imem->idx);
+}
+
+static int ice_bst_pgk_init(struct ice_parser_rt *rt,
+			    struct ice_bst_tcam_item *bst)
+{
+	memset(&rt->pg_key, 0, sizeof(rt->pg_key));
+	rt->pg_key.boost_idx = bst->hit_idx_grp;
+	rt->pg_key.next_proto = ice_pk_build(rt, &bst->np_kb);
+	if (rt->pg_key.next_proto == U32_MAX)
+		return -EINVAL;
+
+	if (bst->pg_kb.flag0_ena)
+		rt->pg_key.flag0 = ice_flag_get(rt, bst->pg_kb.flag0_idx);
+	if (bst->pg_kb.flag1_ena)
+		rt->pg_key.flag1 = ice_flag_get(rt, bst->pg_kb.flag1_idx);
+	if (bst->pg_kb.flag2_ena)
+		rt->pg_key.flag2 = ice_flag_get(rt, bst->pg_kb.flag2_idx);
+	if (bst->pg_kb.flag3_ena)
+		rt->pg_key.flag3 = ice_flag_get(rt, bst->pg_kb.flag3_idx);
+
+	rt->pg_key.alu_reg = rt->gpr[bst->pg_kb.alu_reg_idx];
+	rt->pg_key.node_id = rt->gpr[ICE_GPR_NN_IDX];
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generate Parse Graph Key: node_id(%d), flag0-3(%d,%d,%d,%d), boost_idx(%d), alu_reg(0x%04x), next_proto(0x%08x)\n",
+		  rt->pg_key.node_id,
+		  rt->pg_key.flag0,
+		  rt->pg_key.flag1,
+		  rt->pg_key.flag2,
+		  rt->pg_key.flag3,
+		  rt->pg_key.boost_idx,
+		  rt->pg_key.alu_reg,
+		  rt->pg_key.next_proto);
+
+	return 0;
+}
+
+static void ice_bst_alu0_set(struct ice_parser_rt *rt,
+			     struct ice_bst_tcam_item *bst)
+{
+	rt->alu0 = &bst->alu0;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU0 from boost address %d\n",
+		  bst->addr);
+}
+
+static void ice_bst_alu1_set(struct ice_parser_rt *rt,
+			     struct ice_bst_tcam_item *bst)
+{
+	rt->alu1 = &bst->alu1;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU1 from boost address %d\n",
+		  bst->addr);
+}
+
+static void ice_bst_alu2_set(struct ice_parser_rt *rt,
+			     struct ice_bst_tcam_item *bst)
+{
+	rt->alu2 = &bst->alu2;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load ALU2 from boost address %d\n",
+		  bst->addr);
+}
+
+static void ice_bst_pgp_set(struct ice_parser_rt *rt,
+			    struct ice_bst_tcam_item *bst)
+{
+	rt->pg_prio = bst->pg_prio;
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load PG priority %d from boost address %d\n",
+		  rt->pg_prio, bst->addr);
+}
+
+static struct ice_pg_cam_item *ice_rt_pg_cam_match(struct ice_parser_rt *rt)
+{
+	struct ice_parser *psr = rt->psr;
+	struct ice_pg_cam_item *item;
+
+	item = ice_pg_cam_match(psr->pg_cam_table, ICE_PG_CAM_TABLE_SIZE,
+				&rt->pg_key);
+	if (!item)
+		item = ice_pg_cam_match(psr->pg_sp_cam_table,
+					ICE_PG_SP_CAM_TABLE_SIZE, &rt->pg_key);
+	return item;
+}
+
+static
+struct ice_pg_nm_cam_item *ice_rt_pg_nm_cam_match(struct ice_parser_rt *rt)
+{
+	struct ice_parser *psr = rt->psr;
+	struct ice_pg_nm_cam_item *item;
+
+	item = ice_pg_nm_cam_match(psr->pg_nm_cam_table,
+				   ICE_PG_NM_CAM_TABLE_SIZE, &rt->pg_key);
+
+	if (!item)
+		item = ice_pg_nm_cam_match(psr->pg_nm_sp_cam_table,
+					   ICE_PG_NM_SP_CAM_TABLE_SIZE,
+					   &rt->pg_key);
+	return item;
+}
+
+static void ice_gpr_add(struct ice_parser_rt *rt, int idx, u16 val)
+{
+	rt->pu.gpr_val_upd[idx] = true;
+	rt->pu.gpr_val[idx] = val;
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Pending update for register %d value %d\n",
+		  idx, val);
+}
+
+static void ice_pg_exe(struct ice_parser_rt *rt)
+{
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ParseGraph action ...\n");
+
+	ice_gpr_add(rt, ICE_GPR_NP_IDX, rt->action->next_pc);
+	ice_gpr_add(rt, ICE_GPR_NN_IDX, rt->action->next_node);
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ParseGraph action done.\n");
+}
+
+static void ice_flg_add(struct ice_parser_rt *rt, int idx, bool val)
+{
+	rt->pu.flg_msk |= BIT_ULL(idx);
+	if (val)
+		rt->pu.flg_val |= BIT_ULL(idx);
+	else
+		rt->pu.flg_val &= ~BIT_ULL(idx);
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Pending update for flag %d value %d\n",
+		  idx, val);
+}
+
+static void ice_flg_update(struct ice_parser_rt *rt, struct ice_alu *alu)
+{
+	u32 hv_bit_sel;
+	int i;
+
+	if (!alu->dedicate_flags_ena)
+		return;
+
+	if (alu->flags_extr_imm) {
+		for (i = 0; i < alu->dst_len; i++)
+			ice_flg_add(rt, alu->dst_start + i,
+				    !!(alu->flags_start_imm & BIT(i)));
+	} else {
+		for (i = 0; i < alu->dst_len; i++) {
+			hv_bit_sel = ice_hv_bit_sel(rt,
+						    alu->flags_start_imm + i,
+						    1);
+			ice_flg_add(rt, alu->dst_start + i, !!hv_bit_sel);
+		}
+	}
+}
+
+static void ice_po_update(struct ice_parser_rt *rt, struct ice_alu *alu)
+{
+	if (alu->proto_offset_opc == ICE_PO_OFF_HDR_ADD)
+		rt->po = (u16)(rt->gpr[ICE_GPR_HO_IDX] + alu->proto_offset);
+	else if (alu->proto_offset_opc == ICE_PO_OFF_HDR_SUB)
+		rt->po = (u16)(rt->gpr[ICE_GPR_HO_IDX] - alu->proto_offset);
+	else if (alu->proto_offset_opc == ICE_PO_OFF_REMAIN)
+		rt->po = rt->gpr[ICE_GPR_HO_IDX];
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Update Protocol Offset = %d\n",
+		  rt->po);
+}
+
+static u16 ice_reg_bit_sel(struct ice_parser_rt *rt, int reg_idx,
+			   int start, int len)
+{
+	int offset;
+	u32 val;
+
+	offset = ICE_GPR_HV_IDX + (start / BITS_PER_TYPE(u16));
+
+	memcpy(&val, &rt->gpr[offset], sizeof(val));
+
+	val = bitrev8x4(val);
+	val >>= start % BITS_PER_TYPE(u16);
+
+	return ice_bit_rev_u16(val, len);
+}
+
+static void ice_err_add(struct ice_parser_rt *rt, int idx, bool val)
+{
+	rt->pu.err_msk |= (u16)BIT(idx);
+	if (val)
+		rt->pu.flg_val |= (u64)BIT_ULL(idx);
+	else
+		rt->pu.flg_val &= ~(u64)BIT_ULL(idx);
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Pending update for error %d value %d\n",
+		  idx, val);
+}
+
+static void ice_dst_reg_bit_set(struct ice_parser_rt *rt, struct ice_alu *alu,
+				bool val)
+{
+	u16 flg_idx;
+
+	if (alu->dedicate_flags_ena) {
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "DedicatedFlagsEnable should not be enabled in opcode %d\n",
+			  alu->opc);
+		return;
+	}
+
+	if (alu->dst_reg_id == ICE_GPR_ERR_IDX) {
+		if (alu->dst_start >= ICE_PARSER_ERR_NUM) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Invalid error %d\n",
+				  alu->dst_start);
+			return;
+		}
+		ice_err_add(rt, alu->dst_start, val);
+	} else if (alu->dst_reg_id >= ICE_GPR_FLG_IDX) {
+		flg_idx = (u16)(((alu->dst_reg_id - ICE_GPR_FLG_IDX) << 4) +
+				alu->dst_start);
+
+		if (flg_idx >= ICE_PARSER_FLG_NUM) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Invalid flag %d\n",
+				  flg_idx);
+			return;
+		}
+		ice_flg_add(rt, flg_idx, val);
+	} else {
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Unexpected Dest Register Bit set, RegisterID %d Start %d\n",
+			  alu->dst_reg_id, alu->dst_start);
+	}
+}
+
+static void ice_alu_exe(struct ice_parser_rt *rt, struct ice_alu *alu)
+{
+	u16 dst, src, shift, imm;
+
+	if (alu->shift_xlate_sel) {
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "shift_xlate_sel != 0 is not expected\n");
+		return;
+	}
+
+	ice_po_update(rt, alu);
+	ice_flg_update(rt, alu);
+
+	dst = rt->gpr[alu->dst_reg_id];
+	src = ice_reg_bit_sel(rt, alu->src_reg_id,
+			      alu->src_start, alu->src_len);
+	shift = alu->shift_xlate_key;
+	imm = alu->imm;
+
+	switch (alu->opc) {
+	case ICE_ALU_PARK:
+		break;
+	case ICE_ALU_MOV_ADD:
+		dst = (src << shift) + imm;
+		ice_gpr_add(rt, alu->dst_reg_id, dst);
+		break;
+	case ICE_ALU_ADD:
+		dst += (src << shift) + imm;
+		ice_gpr_add(rt, alu->dst_reg_id, dst);
+		break;
+	case ICE_ALU_ORLT:
+		if (src < imm)
+			ice_dst_reg_bit_set(rt, alu, true);
+		ice_gpr_add(rt, ICE_GPR_NP_IDX, alu->branch_addr);
+		break;
+	case ICE_ALU_OREQ:
+		if (src == imm)
+			ice_dst_reg_bit_set(rt, alu, true);
+		ice_gpr_add(rt, ICE_GPR_NP_IDX, alu->branch_addr);
+		break;
+	case ICE_ALU_SETEQ:
+		ice_dst_reg_bit_set(rt, alu, src == imm);
+		ice_gpr_add(rt, ICE_GPR_NP_IDX, alu->branch_addr);
+		break;
+	case ICE_ALU_MOV_XOR:
+		dst = (src << shift) ^ imm;
+		ice_gpr_add(rt, alu->dst_reg_id, dst);
+		break;
+	default:
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Unsupported ALU instruction %d\n",
+			  alu->opc);
+		break;
+	}
+}
+
+static void ice_alu0_exe(struct ice_parser_rt *rt)
+{
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU0 ...\n");
+	ice_alu_exe(rt, rt->alu0);
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU0 done.\n");
+}
+
+static void ice_alu1_exe(struct ice_parser_rt *rt)
+{
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU1 ...\n");
+	ice_alu_exe(rt, rt->alu1);
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU1 done.\n");
+}
+
+static void ice_alu2_exe(struct ice_parser_rt *rt)
+{
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU2 ...\n");
+	ice_alu_exe(rt, rt->alu2);
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Executing ALU2 done.\n");
+}
+
+static void ice_pu_exe(struct ice_parser_rt *rt)
+{
+	struct ice_gpr_pu *pu = &rt->pu;
+	unsigned int i;
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Updating Registers ...\n");
+
+	for (i = 0; i < ICE_PARSER_GPR_NUM; i++) {
+		if (pu->gpr_val_upd[i])
+			ice_rt_gpr_set(rt, i, pu->gpr_val[i]);
+	}
+
+	for (i = 0; i < ICE_PARSER_FLG_NUM; i++) {
+		if (pu->flg_msk & BIT(i))
+			ice_rt_flag_set(rt, i, pu->flg_val & BIT(i));
+	}
+
+	for (i = 0; i < ICE_PARSER_ERR_NUM; i++) {
+		if (pu->err_msk & BIT(i))
+			ice_rt_err_set(rt, i, pu->err_val & BIT(i));
+	}
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Updating Registers done.\n");
+}
+
+static void ice_alu_pg_exe(struct ice_parser_rt *rt)
+{
+	memset(&rt->pu, 0, sizeof(rt->pu));
+
+	switch (rt->pg_prio) {
+	case (ICE_PG_P0):
+		ice_pg_exe(rt);
+		ice_alu0_exe(rt);
+		ice_alu1_exe(rt);
+		ice_alu2_exe(rt);
+		break;
+	case (ICE_PG_P1):
+		ice_alu0_exe(rt);
+		ice_pg_exe(rt);
+		ice_alu1_exe(rt);
+		ice_alu2_exe(rt);
+		break;
+	case (ICE_PG_P2):
+		ice_alu0_exe(rt);
+		ice_alu1_exe(rt);
+		ice_pg_exe(rt);
+		ice_alu2_exe(rt);
+		break;
+	case (ICE_PG_P3):
+		ice_alu0_exe(rt);
+		ice_alu1_exe(rt);
+		ice_alu2_exe(rt);
+		ice_pg_exe(rt);
+		break;
+	}
+
+	ice_pu_exe(rt);
+
+	if (rt->action->ho_inc == 0)
+		return;
+
+	if (rt->action->ho_polarity)
+		ice_rt_ho_set(rt, rt->gpr[ICE_GPR_HO_IDX] + rt->action->ho_inc);
+	else
+		ice_rt_ho_set(rt, rt->gpr[ICE_GPR_HO_IDX] - rt->action->ho_inc);
+}
+
+static void ice_proto_off_update(struct ice_parser_rt *rt)
+{
+	struct ice_parser *psr = rt->psr;
+
+	if (rt->action->is_pg) {
+		struct ice_proto_grp_item *proto_grp =
+			&psr->proto_grp_table[rt->action->proto_id];
+		u16 po;
+		int i;
+
+		for (i = 0; i < ICE_PROTO_COUNT_PER_GRP; i++) {
+			struct ice_proto_off *entry = &proto_grp->po[i];
+
+			if (entry->proto_id == U8_MAX)
+				break;
+
+			if (!entry->polarity)
+				po = rt->po + entry->offset;
+			else
+				po = rt->po - entry->offset;
+
+			rt->protocols[entry->proto_id] = true;
+			rt->offsets[entry->proto_id] = po;
+
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Set Protocol %d at offset %d\n",
+				  entry->proto_id, po);
+		}
+	} else {
+		rt->protocols[rt->action->proto_id] = true;
+		rt->offsets[rt->action->proto_id] = rt->po;
+
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Set Protocol %d at offset %d\n",
+			  rt->action->proto_id, rt->po);
+	}
+}
+
+static void ice_marker_set(struct ice_parser_rt *rt, int idx)
+{
+	unsigned int byte = idx / BITS_PER_BYTE;
+	unsigned int bit = idx % BITS_PER_BYTE;
+
+	rt->markers[byte] |= (u8)BIT(bit);
+}
+
+static void ice_marker_update(struct ice_parser_rt *rt)
+{
+	struct ice_parser *psr = rt->psr;
+
+	if (rt->action->is_mg) {
+		struct ice_mk_grp_item *mk_grp =
+			&psr->mk_grp_table[rt->action->marker_id];
+		int i;
+
+		for (i = 0; i < ICE_MARKER_ID_NUM; i++) {
+			u8 marker = mk_grp->markers[i];
+
+			if (marker == ICE_MARKER_MAX_SIZE)
+				break;
+
+			ice_marker_set(rt, marker);
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Set Marker %d\n",
+				  marker);
+		}
+	} else {
+		if (rt->action->marker_id != ICE_MARKER_MAX_SIZE)
+			ice_marker_set(rt, rt->action->marker_id);
+
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Set Marker %d\n",
+			  rt->action->marker_id);
+	}
+}
+
+static u16 ice_ptype_resolve(struct ice_parser_rt *rt)
+{
+	struct ice_ptype_mk_tcam_item *item;
+	struct ice_parser *psr = rt->psr;
+
+	item = ice_ptype_mk_tcam_match(psr->ptype_mk_tcam_table,
+				       rt->markers, ICE_MARKER_ID_SIZE);
+	if (item)
+		return item->ptype;
+
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Could not resolve PTYPE\n");
+	return U16_MAX;
+}
+
+static void ice_proto_off_resolve(struct ice_parser_rt *rt,
+				  struct ice_parser_result *rslt)
+{
+	int i;
+
+	for (i = 0; i < ICE_PO_PAIR_SIZE - 1; i++) {
+		if (rt->protocols[i]) {
+			rslt->po[rslt->po_num].proto_id = (u8)i;
+			rslt->po[rslt->po_num].offset = rt->offsets[i];
+			rslt->po_num++;
+		}
+	}
+}
+
+static void ice_result_resolve(struct ice_parser_rt *rt,
+			       struct ice_parser_result *rslt)
+{
+	struct ice_parser *psr = rt->psr;
+
+	memset(rslt, 0, sizeof(*rslt));
+
+	memcpy(&rslt->flags_psr, &rt->gpr[ICE_GPR_FLG_IDX],
+	       ICE_PARSER_FLAG_PSR_SIZE);
+	rslt->flags_pkt = ice_flg_redirect(psr->flg_rd_table, rslt->flags_psr);
+	rslt->flags_sw = ice_xlt_kb_flag_get(psr->xlt_kb_sw, rslt->flags_pkt);
+	rslt->flags_fd = ice_xlt_kb_flag_get(psr->xlt_kb_fd, rslt->flags_pkt);
+	rslt->flags_rss = ice_xlt_kb_flag_get(psr->xlt_kb_rss, rslt->flags_pkt);
+
+	ice_proto_off_resolve(rt, rslt);
+	rslt->ptype = ice_ptype_resolve(rt);
+}
+
+/**
+ * ice_parser_rt_execute - parser execution routine
+ * @rt: pointer to the parser runtime
+ * @rslt: input/output parameter to save parser result
+ *
+ * Return: 0 on success or errno.
+ */
+int ice_parser_rt_execute(struct ice_parser_rt *rt,
+			  struct ice_parser_result *rslt)
+{
+	struct ice_pg_nm_cam_item *pg_nm_cam;
+	struct ice_parser *psr = rt->psr;
+	struct ice_pg_cam_item *pg_cam;
+	int status = 0;
+	u16 node;
+	u16 pc;
+
+	node = rt->gpr[ICE_GPR_NN_IDX];
+	ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Start with Node: %u\n", node);
+
+	while (true) {
+		struct ice_bst_tcam_item *bst;
+		struct ice_imem_item *imem;
+
+		pc = rt->gpr[ICE_GPR_NP_IDX];
+		imem = &psr->imem_table[pc];
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Load imem at pc: %u\n",
+			  pc);
+
+		ice_bst_key_init(rt, imem);
+		bst = ice_bst_tcam_match(psr->bst_tcam_table, rt->bst_key);
+		if (!bst) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "No Boost TCAM Match\n");
+			status = ice_imem_pgk_init(rt, imem);
+			if (status)
+				break;
+			ice_imem_alu0_set(rt, imem);
+			ice_imem_alu1_set(rt, imem);
+			ice_imem_alu2_set(rt, imem);
+			ice_imem_pgp_set(rt, imem);
+		} else {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Boost TCAM Match address: %u\n",
+				  bst->addr);
+			if (imem->b_m.pg) {
+				status = ice_bst_pgk_init(rt, bst);
+				if (status)
+					break;
+				ice_bst_pgp_set(rt, bst);
+			} else {
+				status = ice_imem_pgk_init(rt, imem);
+				if (status)
+					break;
+				ice_imem_pgp_set(rt, imem);
+			}
+
+			if (imem->b_m.alu0)
+				ice_bst_alu0_set(rt, bst);
+			else
+				ice_imem_alu0_set(rt, imem);
+
+			if (imem->b_m.alu1)
+				ice_bst_alu1_set(rt, bst);
+			else
+				ice_imem_alu1_set(rt, imem);
+
+			if (imem->b_m.alu2)
+				ice_bst_alu2_set(rt, bst);
+			else
+				ice_imem_alu2_set(rt, imem);
+		}
+
+		rt->action = NULL;
+		pg_cam = ice_rt_pg_cam_match(rt);
+		if (!pg_cam) {
+			pg_nm_cam = ice_rt_pg_nm_cam_match(rt);
+			if (pg_nm_cam) {
+				ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Match ParseGraph Nomatch CAM Address %u\n",
+					  pg_nm_cam->idx);
+				rt->action = &pg_nm_cam->action;
+			}
+		} else {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Match ParseGraph CAM Address %u\n",
+				  pg_cam->idx);
+			rt->action = &pg_cam->action;
+		}
+
+		if (!rt->action) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Failed to match ParseGraph CAM, stop parsing.\n");
+			status = -EINVAL;
+			break;
+		}
+
+		ice_alu_pg_exe(rt);
+		ice_marker_update(rt);
+		ice_proto_off_update(rt);
+
+		ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Go to node %u\n",
+			  rt->action->next_node);
+
+		if (rt->action->is_last_round) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Last Round in ParseGraph Action, stop parsing.\n");
+			break;
+		}
+
+		if (rt->gpr[ICE_GPR_HO_IDX] >= rt->pkt_len) {
+			ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Header Offset (%u) is larger than packet len (%u), stop parsing\n",
+				  rt->gpr[ICE_GPR_HO_IDX], rt->pkt_len);
+			break;
+		}
+	}
+
+	ice_result_resolve(rt, rslt);
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index e2786cc13286..ef2e858f49bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
 	/* Update cached link status for this port immediately */
 	ptp_port->link_up = linkup;
 
+	/* Skip HW writes if reset is in progress */
+	if (pf->hw.reset_ongoing)
+		return;
+
 	switch (hw->ptp.phy_model) {
 	case ICE_PHY_E810:
 		/* Do not reconfigure E810 PHY */
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index bdda3401e343..970a99a52bf1 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -59,12 +59,13 @@ static void
 ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_repr *repr = np->repr;
 	struct ice_eth_stats *eth_stats;
 	struct ice_vsi *vsi;
 
-	if (ice_is_vf_disabled(np->repr->vf))
+	if (repr->ops.ready(repr))
 		return;
-	vsi = np->repr->src_vsi;
+	vsi = repr->src_vsi;
 
 	ice_update_vsi_stats(vsi);
 	eth_stats = &vsi->eth_stats;
@@ -93,7 +94,7 @@ struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev)
 }
 
 /**
- * ice_repr_open - Enable port representor's network interface
+ * ice_repr_vf_open - Enable port representor's network interface
  * @netdev: network interface device structure
  *
  * The open entry point is called when a port representor's network
@@ -102,7 +103,7 @@ struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev)
  *
  * Returns 0 on success
  */
-static int ice_repr_open(struct net_device *netdev)
+static int ice_repr_vf_open(struct net_device *netdev)
 {
 	struct ice_repr *repr = ice_netdev_to_repr(netdev);
 	struct ice_vf *vf;
@@ -118,8 +119,16 @@ static int ice_repr_open(struct net_device *netdev)
 	return 0;
 }
 
+static int ice_repr_sf_open(struct net_device *netdev)
+{
+	netif_carrier_on(netdev);
+	netif_tx_start_all_queues(netdev);
+
+	return 0;
+}
+
 /**
- * ice_repr_stop - Disable port representor's network interface
+ * ice_repr_vf_stop - Disable port representor's network interface
  * @netdev: network interface device structure
  *
  * The stop entry point is called when a port representor's network
@@ -128,7 +137,7 @@ static int ice_repr_open(struct net_device *netdev)
  *
  * Returns 0 on success
  */
-static int ice_repr_stop(struct net_device *netdev)
+static int ice_repr_vf_stop(struct net_device *netdev)
 {
 	struct ice_repr *repr = ice_netdev_to_repr(netdev);
 	struct ice_vf *vf;
@@ -144,6 +153,14 @@ static int ice_repr_stop(struct net_device *netdev)
 	return 0;
 }
 
+static int ice_repr_sf_stop(struct net_device *netdev)
+{
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	return 0;
+}
+
 /**
  * ice_repr_sp_stats64 - get slow path stats for port representor
  * @dev: network interface device structure
@@ -245,10 +262,20 @@ ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 	}
 }
 
-static const struct net_device_ops ice_repr_netdev_ops = {
+static const struct net_device_ops ice_repr_vf_netdev_ops = {
 	.ndo_get_stats64 = ice_repr_get_stats64,
-	.ndo_open = ice_repr_open,
-	.ndo_stop = ice_repr_stop,
+	.ndo_open = ice_repr_vf_open,
+	.ndo_stop = ice_repr_vf_stop,
+	.ndo_start_xmit = ice_eswitch_port_start_xmit,
+	.ndo_setup_tc = ice_repr_setup_tc,
+	.ndo_has_offload_stats = ice_repr_ndo_has_offload_stats,
+	.ndo_get_offload_stats = ice_repr_ndo_get_offload_stats,
+};
+
+static const struct net_device_ops ice_repr_sf_netdev_ops = {
+	.ndo_get_stats64 = ice_repr_get_stats64,
+	.ndo_open = ice_repr_sf_open,
+	.ndo_stop = ice_repr_sf_stop,
 	.ndo_start_xmit = ice_eswitch_port_start_xmit,
 	.ndo_setup_tc = ice_repr_setup_tc,
 	.ndo_has_offload_stats = ice_repr_ndo_has_offload_stats,
@@ -261,18 +288,20 @@ static const struct net_device_ops ice_repr_netdev_ops = {
  */
 bool ice_is_port_repr_netdev(const struct net_device *netdev)
 {
-	return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops);
+	return netdev && (netdev->netdev_ops == &ice_repr_vf_netdev_ops ||
+			  netdev->netdev_ops == &ice_repr_sf_netdev_ops);
 }
 
 /**
  * ice_repr_reg_netdev - register port representor netdev
  * @netdev: pointer to port representor netdev
+ * @ops: new ops for netdev
  */
 static int
-ice_repr_reg_netdev(struct net_device *netdev)
+ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops)
 {
 	eth_hw_addr_random(netdev);
-	netdev->netdev_ops = &ice_repr_netdev_ops;
+	netdev->netdev_ops = ops;
 	ice_set_ethtool_repr_ops(netdev);
 
 	netdev->hw_features |= NETIF_F_HW_TC;
@@ -283,57 +312,56 @@ ice_repr_reg_netdev(struct net_device *netdev)
 	return register_netdev(netdev);
 }
 
-static void ice_repr_remove_node(struct devlink_port *devlink_port)
+static int ice_repr_ready_vf(struct ice_repr *repr)
+{
+	return !ice_check_vf_ready_for_cfg(repr->vf);
+}
+
+static int ice_repr_ready_sf(struct ice_repr *repr)
 {
-	devl_rate_leaf_destroy(devlink_port);
+	return !repr->sf->active;
 }
 
 /**
- * ice_repr_rem - remove representor from VF
+ * ice_repr_destroy - remove representor from VF
  * @repr: pointer to representor structure
  */
-static void ice_repr_rem(struct ice_repr *repr)
+void ice_repr_destroy(struct ice_repr *repr)
 {
 	free_percpu(repr->stats);
 	free_netdev(repr->netdev);
 	kfree(repr);
 }
 
-/**
- * ice_repr_rem_vf - remove representor from VF
- * @repr: pointer to representor structure
- */
-void ice_repr_rem_vf(struct ice_repr *repr)
+static void ice_repr_rem_vf(struct ice_repr *repr)
 {
-	ice_repr_remove_node(&repr->vf->devlink_port);
 	ice_eswitch_decfg_vsi(repr->src_vsi, repr->parent_mac);
 	unregister_netdev(repr->netdev);
 	ice_devlink_destroy_vf_port(repr->vf);
 	ice_virtchnl_set_dflt_ops(repr->vf);
-	ice_repr_rem(repr);
 }
 
-static void ice_repr_set_tx_topology(struct ice_pf *pf)
+static void ice_repr_rem_sf(struct ice_repr *repr)
 {
-	struct devlink *devlink;
+	unregister_netdev(repr->netdev);
+	ice_devlink_destroy_sf_port(repr->sf);
+}
 
+static void ice_repr_set_tx_topology(struct ice_pf *pf, struct devlink *devlink)
+{
 	/* only export if ADQ and DCB disabled and eswitch enabled*/
 	if (ice_is_adq_active(pf) || ice_is_dcb_active(pf) ||
 	    !ice_is_switchdev_running(pf))
 		return;
 
-	devlink = priv_to_devlink(pf);
 	ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
 }
 
 /**
- * ice_repr_add - add representor for generic VSI
- * @pf: pointer to PF structure
+ * ice_repr_create - add representor for generic VSI
  * @src_vsi: pointer to VSI structure of device to represent
- * @parent_mac: device MAC address
  */
-static struct ice_repr *
-ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac)
+static struct ice_repr *ice_repr_create(struct ice_vsi *src_vsi)
 {
 	struct ice_netdev_priv *np;
 	struct ice_repr *repr;
@@ -360,7 +388,10 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac)
 	np = netdev_priv(repr->netdev);
 	np->repr = repr;
 
-	ether_addr_copy(repr->parent_mac, parent_mac);
+	repr->netdev->min_mtu = ETH_MIN_MTU;
+	repr->netdev->max_mtu = ICE_MAX_MTU;
+
+	SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(src_vsi->back));
 
 	return repr;
 
@@ -371,34 +402,18 @@ err_alloc:
 	return ERR_PTR(err);
 }
 
-struct ice_repr *ice_repr_add_vf(struct ice_vf *vf)
+static int ice_repr_add_vf(struct ice_repr *repr)
 {
-	struct ice_repr *repr;
-	struct ice_vsi *vsi;
+	struct ice_vf *vf = repr->vf;
+	struct devlink *devlink;
 	int err;
 
-	vsi = ice_get_vf_vsi(vf);
-	if (!vsi)
-		return ERR_PTR(-ENOENT);
-
 	err = ice_devlink_create_vf_port(vf);
 	if (err)
-		return ERR_PTR(err);
-
-	repr = ice_repr_add(vf->pf, vsi, vf->hw_lan_addr);
-	if (IS_ERR(repr)) {
-		err = PTR_ERR(repr);
-		goto err_repr_add;
-	}
-
-	repr->vf = vf;
-
-	repr->netdev->min_mtu = ETH_MIN_MTU;
-	repr->netdev->max_mtu = ICE_MAX_MTU;
+		return err;
 
-	SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(vf->pf));
 	SET_NETDEV_DEVLINK_PORT(repr->netdev, &vf->devlink_port);
-	err = ice_repr_reg_netdev(repr->netdev);
+	err = ice_repr_reg_netdev(repr->netdev, &ice_repr_vf_netdev_ops);
 	if (err)
 		goto err_netdev;
 
@@ -407,17 +422,97 @@ struct ice_repr *ice_repr_add_vf(struct ice_vf *vf)
 		goto err_cfg_vsi;
 
 	ice_virtchnl_set_repr_ops(vf);
-	ice_repr_set_tx_topology(vf->pf);
 
-	return repr;
+	devlink = priv_to_devlink(vf->pf);
+	ice_repr_set_tx_topology(vf->pf, devlink);
+
+	return 0;
 
 err_cfg_vsi:
 	unregister_netdev(repr->netdev);
 err_netdev:
-	ice_repr_rem(repr);
-err_repr_add:
 	ice_devlink_destroy_vf_port(vf);
-	return ERR_PTR(err);
+	return err;
+}
+
+/**
+ * ice_repr_create_vf - add representor for VF VSI
+ * @vf: VF to create port representor on
+ *
+ * Set correct representor type for VF and functions pointer.
+ *
+ * Return: created port representor on success, error otherwise
+ */
+struct ice_repr *ice_repr_create_vf(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	struct ice_repr *repr;
+
+	if (!vsi)
+		return ERR_PTR(-EINVAL);
+
+	repr = ice_repr_create(vsi);
+	if (IS_ERR(repr))
+		return repr;
+
+	repr->type = ICE_REPR_TYPE_VF;
+	repr->vf = vf;
+	repr->ops.add = ice_repr_add_vf;
+	repr->ops.rem = ice_repr_rem_vf;
+	repr->ops.ready = ice_repr_ready_vf;
+
+	ether_addr_copy(repr->parent_mac, vf->hw_lan_addr);
+
+	return repr;
+}
+
+static int ice_repr_add_sf(struct ice_repr *repr)
+{
+	struct ice_dynamic_port *sf = repr->sf;
+	int err;
+
+	err = ice_devlink_create_sf_port(sf);
+	if (err)
+		return err;
+
+	SET_NETDEV_DEVLINK_PORT(repr->netdev, &sf->devlink_port);
+	err = ice_repr_reg_netdev(repr->netdev, &ice_repr_sf_netdev_ops);
+	if (err)
+		goto err_netdev;
+
+	ice_repr_set_tx_topology(sf->vsi->back, priv_to_devlink(sf->vsi->back));
+
+	return 0;
+
+err_netdev:
+	ice_devlink_destroy_sf_port(sf);
+	return err;
+}
+
+/**
+ * ice_repr_create_sf - add representor for SF VSI
+ * @sf: SF to create port representor on
+ *
+ * Set correct representor type for SF and functions pointer.
+ *
+ * Return: created port representor on success, error otherwise
+ */
+struct ice_repr *ice_repr_create_sf(struct ice_dynamic_port *sf)
+{
+	struct ice_repr *repr = ice_repr_create(sf->vsi);
+
+	if (IS_ERR(repr))
+		return repr;
+
+	repr->type = ICE_REPR_TYPE_SF;
+	repr->sf = sf;
+	repr->ops.add = ice_repr_add_sf;
+	repr->ops.rem = ice_repr_rem_sf;
+	repr->ops.ready = ice_repr_ready_sf;
+
+	ether_addr_copy(repr->parent_mac, sf->hw_addr);
+
+	return repr;
 }
 
 struct ice_repr *ice_repr_get(struct ice_pf *pf, u32 id)
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
index 488661b2900b..35bd93165e1e 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.h
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -15,19 +15,35 @@ struct ice_repr_pcpu_stats {
 	u64 tx_drops;
 };
 
+enum ice_repr_type {
+	ICE_REPR_TYPE_VF,
+	ICE_REPR_TYPE_SF,
+};
+
 struct ice_repr {
 	struct ice_vsi *src_vsi;
-	struct ice_vf *vf;
 	struct net_device *netdev;
 	struct metadata_dst *dst;
 	struct ice_esw_br_port *br_port;
 	struct ice_repr_pcpu_stats __percpu *stats;
 	u32 id;
 	u8 parent_mac[ETH_ALEN];
+	enum ice_repr_type type;
+	union {
+		struct ice_vf *vf;
+		struct ice_dynamic_port *sf;
+	};
+	struct {
+		int (*add)(struct ice_repr *repr);
+		void (*rem)(struct ice_repr *repr);
+		int (*ready)(struct ice_repr *repr);
+	} ops;
 };
 
-struct ice_repr *ice_repr_add_vf(struct ice_vf *vf);
-void ice_repr_rem_vf(struct ice_repr *repr);
+struct ice_repr *ice_repr_create_vf(struct ice_vf *vf);
+struct ice_repr *ice_repr_create_sf(struct ice_dynamic_port *sf);
+
+void ice_repr_destroy(struct ice_repr *repr);
 
 void ice_repr_start_tx_queues(struct ice_repr *repr);
 void ice_repr_stop_tx_queues(struct ice_repr *repr);
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index ecf8f5d60292..6ca13c5dcb14 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -28,9 +28,8 @@ ice_sched_add_root_node(struct ice_port_info *pi,
 	if (!root)
 		return -ENOMEM;
 
-	/* coverity[suspicious_sizeof] */
 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
-				      sizeof(*root), GFP_KERNEL);
+				      sizeof(*root->children), GFP_KERNEL);
 	if (!root->children) {
 		devm_kfree(ice_hw_to_dev(hw), root);
 		return -ENOMEM;
@@ -186,10 +185,9 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer,
 	if (!node)
 		return -ENOMEM;
 	if (hw->max_children[layer]) {
-		/* coverity[suspicious_sizeof] */
 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
 					      hw->max_children[layer],
-					      sizeof(*node), GFP_KERNEL);
+					      sizeof(*node->children), GFP_KERNEL);
 		if (!node->children) {
 			devm_kfree(ice_hw_to_dev(hw), node);
 			return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c
new file mode 100644
index 000000000000..75d7147e1c01
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Intel Corporation. */
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_txrx.h"
+#include "ice_fltr.h"
+#include "ice_sf_eth.h"
+#include "devlink/devlink_port.h"
+#include "devlink/devlink.h"
+
+static const struct net_device_ops ice_sf_netdev_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp,
+	.ndo_xdp_xmit = ice_xdp_xmit,
+	.ndo_xsk_wakeup = ice_xsk_wakeup,
+};
+
+/**
+ * ice_sf_cfg_netdev - Allocate, configure and register a netdev
+ * @dyn_port: subfunction associated with configured netdev
+ * @devlink_port: subfunction devlink port to be linked with netdev
+ *
+ * Return: 0 on success, negative value on failure
+ */
+static int ice_sf_cfg_netdev(struct ice_dynamic_port *dyn_port,
+			     struct devlink_port *devlink_port)
+{
+	struct ice_vsi *vsi = dyn_port->vsi;
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	int err;
+
+	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
+				    vsi->alloc_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
+	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	ice_set_netdev_features(netdev);
+
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY |
+			       NETDEV_XDP_ACT_RX_SG;
+	netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
+
+	eth_hw_addr_set(netdev, dyn_port->hw_addr);
+	ether_addr_copy(netdev->perm_addr, dyn_port->hw_addr);
+	netdev->netdev_ops = &ice_sf_netdev_ops;
+	SET_NETDEV_DEVLINK_PORT(netdev, devlink_port);
+
+	err = register_netdev(netdev);
+	if (err) {
+		free_netdev(netdev);
+		vsi->netdev = NULL;
+		return -ENOMEM;
+	}
+	set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	return 0;
+}
+
+static void ice_sf_decfg_netdev(struct ice_vsi *vsi)
+{
+	unregister_netdev(vsi->netdev);
+	clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+	free_netdev(vsi->netdev);
+	vsi->netdev = NULL;
+	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+}
+
+/**
+ * ice_sf_dev_probe - subfunction driver probe function
+ * @adev: pointer to the auxiliary device
+ * @id: pointer to the auxiliary_device id
+ *
+ * Configure VSI and netdev resources for the subfunction device.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_sf_dev_probe(struct auxiliary_device *adev,
+			    const struct auxiliary_device_id *id)
+{
+	struct ice_sf_dev *sf_dev = ice_adev_to_sf_dev(adev);
+	struct ice_dynamic_port *dyn_port = sf_dev->dyn_port;
+	struct ice_vsi *vsi = dyn_port->vsi;
+	struct ice_pf *pf = dyn_port->pf;
+	struct device *dev = &adev->dev;
+	struct ice_sf_priv *priv;
+	struct devlink *devlink;
+	int err;
+
+	vsi->type = ICE_VSI_SF;
+	vsi->port_info = pf->hw.port_info;
+	vsi->flags = ICE_VSI_FLAG_INIT;
+
+	priv = ice_allocate_sf(&adev->dev, pf);
+	if (IS_ERR(priv)) {
+		dev_err(dev, "Subfunction devlink alloc failed");
+		return PTR_ERR(priv);
+	}
+
+	priv->dev = sf_dev;
+	sf_dev->priv = priv;
+	devlink = priv_to_devlink(priv);
+
+	devl_lock(devlink);
+
+	err = ice_vsi_cfg(vsi);
+	if (err) {
+		dev_err(dev, "Subfunction vsi config failed");
+		goto err_free_devlink;
+	}
+	vsi->sf = dyn_port;
+
+	ice_eswitch_update_repr(&dyn_port->repr_id, vsi);
+
+	err = ice_devlink_create_sf_dev_port(sf_dev);
+	if (err) {
+		dev_err(dev, "Cannot add ice virtual devlink port for subfunction");
+		goto err_vsi_decfg;
+	}
+
+	err = ice_sf_cfg_netdev(dyn_port, &sf_dev->priv->devlink_port);
+	if (err) {
+		dev_err(dev, "Subfunction netdev config failed");
+		goto err_devlink_destroy;
+	}
+
+	err = devl_port_fn_devlink_set(&dyn_port->devlink_port, devlink);
+	if (err) {
+		dev_err(dev, "Can't link devlink instance to SF devlink port");
+		goto err_netdev_decfg;
+	}
+
+	ice_napi_add(vsi);
+
+	devl_register(devlink);
+	devl_unlock(devlink);
+
+	dyn_port->attached = true;
+
+	return 0;
+
+err_netdev_decfg:
+	ice_sf_decfg_netdev(vsi);
+err_devlink_destroy:
+	ice_devlink_destroy_sf_dev_port(sf_dev);
+err_vsi_decfg:
+	ice_vsi_decfg(vsi);
+err_free_devlink:
+	devl_unlock(devlink);
+	devlink_free(devlink);
+	return err;
+}
+
+/**
+ * ice_sf_dev_remove - subfunction driver remove function
+ * @adev: pointer to the auxiliary device
+ *
+ * Deinitalize VSI and netdev resources for the subfunction device.
+ */
+static void ice_sf_dev_remove(struct auxiliary_device *adev)
+{
+	struct ice_sf_dev *sf_dev = ice_adev_to_sf_dev(adev);
+	struct ice_dynamic_port *dyn_port = sf_dev->dyn_port;
+	struct ice_vsi *vsi = dyn_port->vsi;
+	struct devlink *devlink;
+
+	devlink = priv_to_devlink(sf_dev->priv);
+	devl_lock(devlink);
+
+	ice_vsi_close(vsi);
+
+	ice_sf_decfg_netdev(vsi);
+	ice_devlink_destroy_sf_dev_port(sf_dev);
+	devl_unregister(devlink);
+	devl_unlock(devlink);
+	devlink_free(devlink);
+	ice_vsi_decfg(vsi);
+
+	dyn_port->attached = false;
+}
+
+static const struct auxiliary_device_id ice_sf_dev_id_table[] = {
+	{ .name = "ice.sf", },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ice_sf_dev_id_table);
+
+static struct auxiliary_driver ice_sf_driver = {
+	.name = "sf",
+	.probe = ice_sf_dev_probe,
+	.remove = ice_sf_dev_remove,
+	.id_table = ice_sf_dev_id_table
+};
+
+static DEFINE_XARRAY_ALLOC1(ice_sf_aux_id);
+
+/**
+ * ice_sf_driver_register - Register new auxiliary subfunction driver
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_sf_driver_register(void)
+{
+	return auxiliary_driver_register(&ice_sf_driver);
+}
+
+/**
+ * ice_sf_driver_unregister - Unregister new auxiliary subfunction driver
+ *
+ */
+void ice_sf_driver_unregister(void)
+{
+	auxiliary_driver_unregister(&ice_sf_driver);
+}
+
+/**
+ * ice_sf_dev_release - Release device associated with auxiliary device
+ * @device: pointer to the device
+ *
+ * Since most of the code for subfunction deactivation is handled in
+ * the remove handler, here just free tracking resources.
+ */
+static void ice_sf_dev_release(struct device *device)
+{
+	struct auxiliary_device *adev = to_auxiliary_dev(device);
+	struct ice_sf_dev *sf_dev = ice_adev_to_sf_dev(adev);
+
+	xa_erase(&ice_sf_aux_id, adev->id);
+	kfree(sf_dev);
+}
+
+/**
+ * ice_sf_eth_activate - Activate Ethernet subfunction port
+ * @dyn_port: the dynamic port instance for this subfunction
+ * @extack: extack for reporting error messages
+ *
+ * Activate the dynamic port as an Ethernet subfunction. Setup the netdev
+ * resources associated and initialize the auxiliary device.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int
+ice_sf_eth_activate(struct ice_dynamic_port *dyn_port,
+		    struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = dyn_port->pf;
+	struct ice_sf_dev *sf_dev;
+	struct pci_dev *pdev;
+	int err;
+	u32 id;
+
+	err = xa_alloc(&ice_sf_aux_id, &id, NULL, xa_limit_32b,
+		       GFP_KERNEL);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Could not allocate SF ID");
+		return err;
+	}
+
+	sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
+	if (!sf_dev) {
+		err = -ENOMEM;
+		NL_SET_ERR_MSG_MOD(extack, "Could not allocate SF memory");
+		goto xa_erase;
+	}
+	pdev = pf->pdev;
+
+	sf_dev->dyn_port = dyn_port;
+	sf_dev->adev.id = id;
+	sf_dev->adev.name = "sf";
+	sf_dev->adev.dev.release = ice_sf_dev_release;
+	sf_dev->adev.dev.parent = &pdev->dev;
+
+	err = auxiliary_device_init(&sf_dev->adev);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize SF device");
+		goto sf_dev_free;
+	}
+
+	err = auxiliary_device_add(&sf_dev->adev);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to add SF device");
+		goto aux_dev_uninit;
+	}
+
+	dyn_port->sf_dev = sf_dev;
+
+	return 0;
+
+aux_dev_uninit:
+	auxiliary_device_uninit(&sf_dev->adev);
+sf_dev_free:
+	kfree(sf_dev);
+xa_erase:
+	xa_erase(&ice_sf_aux_id, id);
+
+	return err;
+}
+
+/**
+ * ice_sf_eth_deactivate - Deactivate Ethernet subfunction port
+ * @dyn_port: the dynamic port instance for this subfunction
+ *
+ * Deactivate the Ethernet subfunction, removing its auxiliary device and the
+ * associated resources.
+ */
+void ice_sf_eth_deactivate(struct ice_dynamic_port *dyn_port)
+{
+	struct ice_sf_dev *sf_dev = dyn_port->sf_dev;
+
+	auxiliary_device_delete(&sf_dev->adev);
+	auxiliary_device_uninit(&sf_dev->adev);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.h b/drivers/net/ethernet/intel/ice/ice_sf_eth.h
new file mode 100644
index 000000000000..c558cad0a183
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024, Intel Corporation. */
+
+#ifndef _ICE_SF_ETH_H_
+#define _ICE_SF_ETH_H_
+
+#include <linux/auxiliary_bus.h>
+#include "ice.h"
+
+struct ice_sf_dev {
+	struct auxiliary_device adev;
+	struct ice_dynamic_port *dyn_port;
+	struct ice_sf_priv *priv;
+};
+
+struct ice_sf_priv {
+	struct ice_sf_dev *dev;
+	struct devlink_port devlink_port;
+};
+
+static inline struct
+ice_sf_dev *ice_adev_to_sf_dev(struct auxiliary_device *adev)
+{
+	return container_of(adev, struct ice_sf_dev, adev);
+}
+
+int ice_sf_driver_register(void);
+void ice_sf_driver_unregister(void);
+
+int ice_sf_eth_activate(struct ice_dynamic_port *dyn_port,
+			struct netlink_ext_ack *extack);
+void ice_sf_eth_deactivate(struct ice_dynamic_port *dyn_port);
+#endif /* _ICE_SF_ETH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c
new file mode 100644
index 000000000000..3d7e96721cf9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_sf_vsi_vlan_ops.h"
+
+void ice_sf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+
+	if (ice_is_dvm_ena(&vsi->back->hw))
+		vlan_ops = &vsi->outer_vlan_ops;
+	else
+		vlan_ops = &vsi->inner_vlan_ops;
+
+	vlan_ops->add_vlan = ice_vsi_add_vlan;
+	vlan_ops->del_vlan = ice_vsi_del_vlan;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h
new file mode 100644
index 000000000000..8c44eafceea0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023, Intel Corporation. */
+
+#ifndef _ICE_SF_VSI_VLAN_OPS_H_
+#define _ICE_SF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_sf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_SF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 55ef33208456..e34fe2516ccc 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -175,7 +175,7 @@ void ice_free_vfs(struct ice_pf *pf)
 	ice_for_each_vf(pf, bkt, vf) {
 		mutex_lock(&vf->cfg_lock);
 
-		ice_eswitch_detach(pf, vf);
+		ice_eswitch_detach_vf(pf, vf);
 		ice_dis_vf_qs(vf);
 
 		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
@@ -598,7 +598,7 @@ static int ice_start_vfs(struct ice_pf *pf)
 			goto teardown;
 		}
 
-		retval = ice_eswitch_attach(pf, vf);
+		retval = ice_eswitch_attach_vf(pf, vf);
 		if (retval) {
 			dev_err(ice_pf_to_dev(pf), "Failed to attach VF %d to eswitch, error %d",
 				vf->vf_id, retval);
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index fe8847184cb1..79d91e95358c 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -3194,7 +3194,7 @@ ice_add_update_vsi_list(struct ice_hw *hw,
 
 		/* A rule already exists with the new VSI being added */
 		if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
-			return 0;
+			return -EEXIST;
 
 		/* Update the previously created VSI list set with
 		 * the new VSI ID passed in
@@ -3264,7 +3264,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
 
 	list_head = &sw->recp_list[recp_id].filt_rules;
 	list_for_each_entry(list_itr, list_head, list_entry) {
-		if (list_itr->vsi_list_info) {
+		if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
 			map_info = list_itr->vsi_list_info;
 			if (test_bit(vsi_handle, map_info->vsi_map)) {
 				*vsi_list_id = map_info->vsi_list_id;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 8bb743f78fcb..8208055d6e7f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
 	if (rx_ring->vsi->type == ICE_VSI_PF)
 		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
 			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-	rx_ring->xdp_prog = NULL;
+	WRITE_ONCE(rx_ring->xdp_prog, NULL);
 	if (rx_ring->xsk_pool) {
 		kfree(rx_ring->xdp_buf);
 		rx_ring->xdp_buf = NULL;
@@ -522,30 +522,6 @@ err:
 }
 
 /**
- * ice_rx_frame_truesize
- * @rx_ring: ptr to Rx ring
- * @size: size
- *
- * calculate the truesize with taking into the account PAGE_SIZE of
- * underlying arch
- */
-static unsigned int
-ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
-{
-	unsigned int truesize;
-
-#if (PAGE_SIZE < 8192)
-	truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
-#else
-	truesize = rx_ring->rx_offset ?
-		SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
-		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
-		SKB_DATA_ALIGN(size);
-#endif
-	return truesize;
-}
-
-/**
  * ice_run_xdp - Executes an XDP program on initialized xdp_buff
  * @rx_ring: Rx ring
  * @xdp: xdp_buff used as input to the XDP program
@@ -837,16 +813,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 	if (!dev_page_is_reusable(page))
 		return false;
 
-#if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
 	if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
 		return false;
-#else
+#if (PAGE_SIZE >= 8192)
 #define ICE_LAST_OFFSET \
-	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072)
 	if (rx_buf->page_offset > ICE_LAST_OFFSET)
 		return false;
-#endif /* PAGE_SIZE < 8192) */
+#endif /* PAGE_SIZE >= 8192) */
 
 	/* If we have drained the page fragment pool we need to update
 	 * the pagecnt_bias and page count so that we fully restock the
@@ -949,12 +924,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
 	struct ice_rx_buf *rx_buf;
 
 	rx_buf = &rx_ring->rx_buf[ntc];
-	rx_buf->pgcnt =
-#if (PAGE_SIZE < 8192)
-		page_count(rx_buf->page);
-#else
-		0;
-#endif
+	rx_buf->pgcnt = page_count(rx_buf->page);
 	prefetchw(rx_buf->page);
 
 	if (!size)
@@ -1160,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 	bool failure;
 	u32 first;
 
-	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
-#if (PAGE_SIZE < 8192)
-	xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0);
-#endif
-
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	if (xdp_prog) {
 		xdp_ring = rx_ring->xdp_ring;
@@ -1223,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 			hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
 				     offset;
 			xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
-#if (PAGE_SIZE > 4096)
-			/* At larger PAGE_SIZE, frame_sz depend on len size */
-			xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size);
-#endif
 			xdp_buff_clear_frags_flag(xdp);
 		} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
 			break;
@@ -1521,10 +1482,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	ice_for_each_tx_ring(tx_ring, q_vector->tx) {
+		struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool);
 		bool wd;
 
-		if (tx_ring->xsk_pool)
-			wd = ice_xmit_zc(tx_ring);
+		if (xsk_pool)
+			wd = ice_xmit_zc(tx_ring, xsk_pool);
 		else if (ice_ring_is_xdp(tx_ring))
 			wd = true;
 		else
@@ -1550,6 +1512,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 		budget_per_ring = budget;
 
 	ice_for_each_rx_ring(rx_ring, q_vector->rx) {
+		struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool);
 		int cleaned;
 
 		/* A dedicated path for zero-copy allows making a single
@@ -1557,7 +1520,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 		 * ice_clean_rx_irq function and makes the codebase cleaner.
 		 */
 		cleaned = rx_ring->xsk_pool ?
-			  ice_clean_rx_irq_zc(rx_ring, budget_per_ring) :
+			  ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) :
 			  ice_clean_rx_irq(rx_ring, budget_per_ring);
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
@@ -2405,7 +2368,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
 					ICE_TXD_CTX_QW1_CMD_S);
 
 	ice_tstamp(tx_ring, skb, first, &offload);
-	if (ice_is_switchdev_running(vsi->back))
+	if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF)
 		ice_eswitch_set_target_vsi(skb, &offload);
 
 	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 96037bef3e78..45768796691f 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -61,6 +61,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
 				 ICE_DBG_AQ_DESC	| \
 				 ICE_DBG_AQ_DESC_BUF	| \
 				 ICE_DBG_AQ_CMD)
+#define ICE_DBG_PARSER		BIT_ULL(28)
 
 #define ICE_DBG_USER		BIT_ULL(31)
 
@@ -158,6 +159,7 @@ enum ice_vsi_type {
 	ICE_VSI_CTRL = 3,	/* equates to ICE_VSI_PF with 1 queue pair */
 	ICE_VSI_CHNL = 4,
 	ICE_VSI_LB = 6,
+	ICE_VSI_SF = 9,
 };
 
 struct ice_link_status {
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 5635e9da2212..a69e91f88d81 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -766,7 +766,7 @@ void ice_reset_all_vfs(struct ice_pf *pf)
 	ice_for_each_vf(pf, bkt, vf) {
 		mutex_lock(&vf->cfg_lock);
 
-		ice_eswitch_detach(pf, vf);
+		ice_eswitch_detach_vf(pf, vf);
 		vf->driver_caps = 0;
 		ice_vc_set_default_allowlist(vf);
 
@@ -782,7 +782,7 @@ void ice_reset_all_vfs(struct ice_pf *pf)
 		ice_vf_rebuild_vsi(vf);
 		ice_vf_post_vsi_rebuild(vf);
 
-		ice_eswitch_attach(pf, vf);
+		ice_eswitch_attach_vf(pf, vf);
 
 		mutex_unlock(&vf->cfg_lock);
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index fec16919ec19..be4266899690 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -12,6 +12,7 @@
 #include <net/devlink.h>
 #include <linux/avf/virtchnl.h>
 #include "ice_type.h"
+#include "ice_flow.h"
 #include "ice_virtchnl_fdir.h"
 #include "ice_vsi_vlan_ops.h"
 
@@ -52,6 +53,12 @@ struct ice_mdd_vf_events {
 	u16 last_printed;
 };
 
+/* Structure to store fdir fv entry */
+struct ice_fdir_prof_info {
+	struct ice_parser_profile prof;
+	u64 fdir_active_cnt;
+};
+
 /* VF operations */
 struct ice_vf_ops {
 	enum ice_disq_rst_src reset_type;
@@ -91,6 +98,7 @@ struct ice_vf {
 	u16 lan_vsi_idx;		/* index into PF struct */
 	u16 ctrl_vsi_idx;
 	struct ice_vf_fdir fdir;
+	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	/* first vector index of this VF in the PF space */
 	int first_vector_idx;
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 1c6ce0c4ed4e..59f62306b9cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -461,6 +461,10 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_TC_U32 &&
+	    vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_TC_U32;
+
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index b4feb0927687..14e3f0f89c78 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -26,6 +26,15 @@ enum ice_fdir_tunnel_type {
 	ICE_FDIR_TUNNEL_TYPE_NONE = 0,
 	ICE_FDIR_TUNNEL_TYPE_GTPU,
 	ICE_FDIR_TUNNEL_TYPE_GTPU_EH,
+	ICE_FDIR_TUNNEL_TYPE_ECPRI,
+	ICE_FDIR_TUNNEL_TYPE_GTPU_INNER,
+	ICE_FDIR_TUNNEL_TYPE_GTPU_EH_INNER,
+	ICE_FDIR_TUNNEL_TYPE_GRE,
+	ICE_FDIR_TUNNEL_TYPE_GTPOGRE,
+	ICE_FDIR_TUNNEL_TYPE_GTPOGRE_INNER,
+	ICE_FDIR_TUNNEL_TYPE_GRE_INNER,
+	ICE_FDIR_TUNNEL_TYPE_L2TPV2,
+	ICE_FDIR_TUNNEL_TYPE_L2TPV2_INNER,
 };
 
 struct virtchnl_fdir_fltr_conf {
@@ -33,6 +42,11 @@ struct virtchnl_fdir_fltr_conf {
 	enum ice_fdir_tunnel_type ttype;
 	u64 inset_flag;
 	u32 flow_id;
+
+	struct ice_parser_profile *prof;
+	bool parser_ena;
+	u8 *pkt_buf;
+	u8 pkt_len;
 };
 
 struct virtchnl_fdir_inset_map {
@@ -787,6 +801,107 @@ err_exit:
 }
 
 /**
+ * ice_vc_fdir_is_raw_flow - check if FDIR flow is raw (binary)
+ * @proto: virtchnl protocol headers
+ *
+ * Check if the FDIR rule is raw flow (protocol agnostic flow) or not. Note
+ * that common FDIR rule must have non-zero proto->count. Thus, we choose the
+ * tunnel_level and count of proto as the indicators. If both tunnel_level and
+ * count of proto are zero, this FDIR rule will be regarded as raw flow.
+ *
+ * Returns: true if headers describe raw flow, false otherwise.
+ */
+static bool
+ice_vc_fdir_is_raw_flow(struct virtchnl_proto_hdrs *proto)
+{
+	return (proto->tunnel_level == 0 && proto->count == 0);
+}
+
+/**
+ * ice_vc_fdir_parse_raw - parse a virtchnl raw FDIR rule
+ * @vf: pointer to the VF info
+ * @proto: virtchnl protocol headers
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's raw flow and store it in @conf
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+ice_vc_fdir_parse_raw(struct ice_vf *vf,
+		      struct virtchnl_proto_hdrs *proto,
+		      struct virtchnl_fdir_fltr_conf *conf)
+{
+	u8 *pkt_buf, *msk_buf __free(kfree);
+	struct ice_parser_result rslt;
+	struct ice_pf *pf = vf->pf;
+	struct ice_parser *psr;
+	int status = -ENOMEM;
+	struct ice_hw *hw;
+	u16 udp_port = 0;
+
+	pkt_buf = kzalloc(proto->raw.pkt_len, GFP_KERNEL);
+	msk_buf = kzalloc(proto->raw.pkt_len, GFP_KERNEL);
+	if (!pkt_buf || !msk_buf)
+		goto err_mem_alloc;
+
+	memcpy(pkt_buf, proto->raw.spec, proto->raw.pkt_len);
+	memcpy(msk_buf, proto->raw.mask, proto->raw.pkt_len);
+
+	hw = &pf->hw;
+
+	/* Get raw profile info via Parser Lib */
+	psr = ice_parser_create(hw);
+	if (IS_ERR(psr)) {
+		status = PTR_ERR(psr);
+		goto err_mem_alloc;
+	}
+
+	ice_parser_dvm_set(psr, ice_is_dvm_ena(hw));
+
+	if (ice_get_open_tunnel_port(hw, &udp_port, TNL_VXLAN))
+		ice_parser_vxlan_tunnel_set(psr, udp_port, true);
+
+	status = ice_parser_run(psr, pkt_buf, proto->raw.pkt_len, &rslt);
+	if (status)
+		goto err_parser_destroy;
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_parser_result_dump(hw, &rslt);
+
+	conf->prof = kzalloc(sizeof(*conf->prof), GFP_KERNEL);
+	if (!conf->prof) {
+		status = -ENOMEM;
+		goto err_parser_destroy;
+	}
+
+	status = ice_parser_profile_init(&rslt, pkt_buf, msk_buf,
+					 proto->raw.pkt_len, ICE_BLK_FD,
+					 conf->prof);
+	if (status)
+		goto err_parser_profile_init;
+
+	if (hw->debug_mask & ICE_DBG_PARSER)
+		ice_parser_profile_dump(hw, conf->prof);
+
+	/* Store raw flow info into @conf */
+	conf->pkt_len = proto->raw.pkt_len;
+	conf->pkt_buf = pkt_buf;
+	conf->parser_ena = true;
+
+	ice_parser_destroy(psr);
+	return 0;
+
+err_parser_profile_init:
+	kfree(conf->prof);
+err_parser_destroy:
+	ice_parser_destroy(psr);
+err_mem_alloc:
+	kfree(pkt_buf);
+	return status;
+}
+
+/**
  * ice_vc_fdir_parse_pattern
  * @vf: pointer to the VF info
  * @fltr: virtual channel add cmd buffer
@@ -813,6 +928,10 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 		return -EINVAL;
 	}
 
+	/* For raw FDIR filters created by the parser */
+	if (ice_vc_fdir_is_raw_flow(proto))
+		return ice_vc_fdir_parse_raw(vf, proto, conf);
+
 	for (i = 0; i < proto->count; i++) {
 		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
 		struct ip_esp_hdr *esph;
@@ -1101,8 +1220,10 @@ ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
 	int ret;
 
-	if (!ice_vc_validate_pattern(vf, proto))
-		return -EINVAL;
+	/* For raw FDIR filters created by the parser */
+	if (!ice_vc_fdir_is_raw_flow(proto))
+		if (!ice_vc_validate_pattern(vf, proto))
+			return -EINVAL;
 
 	ret = ice_vc_fdir_parse_pattern(vf, fltr, conf);
 	if (ret)
@@ -1295,11 +1416,15 @@ static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
 		return -ENOMEM;
 
 	ice_fdir_get_prgm_desc(hw, input, &desc, add);
-	ret = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
-	if (ret) {
-		dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
-			vf->vf_id, input->flow_type);
-		goto err_free_pkt;
+	if (conf->parser_ena) {
+		memcpy(pkt, conf->pkt_buf, conf->pkt_len);
+	} else {
+		ret = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+		if (ret) {
+			dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
+				vf->vf_id, input->flow_type);
+			goto err_free_pkt;
+		}
 	}
 
 	ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
@@ -1521,6 +1646,16 @@ err_exit:
 	return ret;
 }
 
+static int ice_fdir_is_tunnel(enum ice_fdir_tunnel_type ttype)
+{
+	return (ttype == ICE_FDIR_TUNNEL_TYPE_GRE_INNER ||
+		ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_INNER ||
+		ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH_INNER ||
+		ttype == ICE_FDIR_TUNNEL_TYPE_GTPOGRE_INNER ||
+		ttype == ICE_FDIR_TUNNEL_TYPE_ECPRI ||
+		ttype == ICE_FDIR_TUNNEL_TYPE_L2TPV2_INNER);
+}
+
 /**
  * ice_vc_add_fdir_fltr_post
  * @vf: pointer to the VF structure
@@ -1782,6 +1917,158 @@ static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf)
 }
 
 /**
+ * ice_vc_parser_fv_check_diff - check two parsed FDIR profile fv context
+ * @fv_a: struct of parsed FDIR profile field vector
+ * @fv_b: struct of parsed FDIR profile field vector
+ *
+ * Check if the two parsed FDIR profile field vector context are different,
+ * including proto_id, offset and mask.
+ *
+ * Return: true on different, false on otherwise.
+ */
+static bool ice_vc_parser_fv_check_diff(struct ice_parser_fv *fv_a,
+					struct ice_parser_fv *fv_b)
+{
+	return (fv_a->proto_id	!= fv_b->proto_id ||
+		fv_a->offset	!= fv_b->offset ||
+		fv_a->msk	!= fv_b->msk);
+}
+
+/**
+ * ice_vc_parser_fv_save - save parsed FDIR profile fv context
+ * @fv: struct of parsed FDIR profile field vector
+ * @fv_src: parsed FDIR profile field vector context to save
+ *
+ * Save the parsed FDIR profile field vector context, including proto_id,
+ * offset and mask.
+ *
+ * Return: Void.
+ */
+static void ice_vc_parser_fv_save(struct ice_parser_fv *fv,
+				  struct ice_parser_fv *fv_src)
+{
+	fv->proto_id	= fv_src->proto_id;
+	fv->offset	= fv_src->offset;
+	fv->msk		= fv_src->msk;
+	fv->spec	= 0;
+}
+
+/**
+ * ice_vc_add_fdir_raw - add a raw FDIR filter for VF
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @v_ret: the final VIRTCHNL code
+ * @stat: pointer to the VIRTCHNL_OP_ADD_FDIR_FILTER
+ * @len: length of the stat
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+ice_vc_add_fdir_raw(struct ice_vf *vf,
+		    struct virtchnl_fdir_fltr_conf *conf,
+		    enum virtchnl_status_code *v_ret,
+		    struct virtchnl_fdir_add *stat, int len)
+{
+	struct ice_vsi *vf_vsi, *ctrl_vsi;
+	struct ice_fdir_prof_info *pi;
+	struct ice_pf *pf = vf->pf;
+	int ret, ptg, id, i;
+	struct device *dev;
+	struct ice_hw *hw;
+	bool fv_found;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	*v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+
+	id = find_first_bit(conf->prof->ptypes, ICE_FLOW_PTYPE_MAX);
+	ptg = hw->blk[ICE_BLK_FD].xlt1.t[id];
+
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi) {
+		dev_err(dev, "Can not get FDIR vf_vsi for VF %d\n", vf->vf_id);
+		return -ENODEV;
+	}
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi) {
+		dev_err(dev, "Can not get FDIR ctrl_vsi for VF %d\n",
+			vf->vf_id);
+		return -ENODEV;
+	}
+
+	fv_found = false;
+
+	/* Check if profile info already exists, then update the counter */
+	pi = &vf->fdir_prof_info[ptg];
+	if (pi->fdir_active_cnt != 0) {
+		for (i = 0; i < ICE_MAX_FV_WORDS; i++)
+			if (ice_vc_parser_fv_check_diff(&pi->prof.fv[i],
+							&conf->prof->fv[i]))
+				break;
+		if (i == ICE_MAX_FV_WORDS) {
+			fv_found = true;
+			pi->fdir_active_cnt++;
+		}
+	}
+
+	/* HW profile setting is only required for the first time */
+	if (!fv_found) {
+		ret = ice_flow_set_parser_prof(hw, vf_vsi->idx,
+					       ctrl_vsi->idx, conf->prof,
+					       ICE_BLK_FD);
+
+		if (ret) {
+			*v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+			dev_dbg(dev, "VF %d: insert hw prof failed\n",
+				vf->vf_id);
+			return ret;
+		}
+	}
+
+	ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id);
+	if (ret) {
+		*v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: insert FDIR list failed\n",
+			vf->vf_id);
+		return ret;
+	}
+
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf,
+				      VIRTCHNL_OP_ADD_FDIR_FILTER);
+	if (ret) {
+		dev_dbg(dev, "VF %d: set FDIR context failed\n",
+			vf->vf_id);
+		goto err_rem_entry;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, true, false);
+	if (ret) {
+		dev_err(dev, "VF %d: adding FDIR raw flow rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_clr_irq;
+	}
+
+	/* Save parsed profile fv info of the FDIR rule for the first time */
+	if (!fv_found) {
+		for (i = 0; i < conf->prof->fv_num; i++)
+			ice_vc_parser_fv_save(&pi->prof.fv[i],
+					      &conf->prof->fv[i]);
+		pi->prof.fv_num = conf->prof->fv_num;
+		pi->fdir_active_cnt = 1;
+	}
+
+	return 0;
+
+err_clr_irq:
+	ice_vc_fdir_clear_irq_ctx(vf);
+err_rem_entry:
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	return ret;
+}
+
+/**
  * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1846,7 +2133,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 	len = sizeof(*stat);
 	ret = ice_vc_validate_fdir_fltr(vf, fltr, conf);
 	if (ret) {
-		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
 		dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id);
 		goto err_free_conf;
@@ -1861,6 +2148,15 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto exit;
 	}
 
+	/* For raw FDIR filters created by the parser */
+	if (conf->parser_ena) {
+		ret = ice_vc_add_fdir_raw(vf, conf, &v_ret, stat, len);
+		if (ret)
+			goto err_free_conf;
+		goto exit;
+	}
+
+	is_tun = ice_fdir_is_tunnel(conf->ttype);
 	ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun);
 	if (ret) {
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
@@ -1922,6 +2218,78 @@ err_exit:
 }
 
 /**
+ * ice_vc_del_fdir_raw - delete a raw FDIR filter for VF
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @v_ret: the final VIRTCHNL code
+ * @stat: pointer to the VIRTCHNL_OP_DEL_FDIR_FILTER
+ * @len: length of the stat
+ *
+ * Return: 0 on success or negative errno on failure.
+ */
+static int
+ice_vc_del_fdir_raw(struct ice_vf *vf,
+		    struct virtchnl_fdir_fltr_conf *conf,
+		    enum virtchnl_status_code *v_ret,
+		    struct virtchnl_fdir_del *stat, int len)
+{
+	struct ice_vsi *vf_vsi, *ctrl_vsi;
+	enum ice_block blk = ICE_BLK_FD;
+	struct ice_fdir_prof_info *pi;
+	struct ice_pf *pf = vf->pf;
+	struct device *dev;
+	struct ice_hw *hw;
+	unsigned long id;
+	u16 vsi_num;
+	int ptg;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	*v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+
+	id = find_first_bit(conf->prof->ptypes, ICE_FLOW_PTYPE_MAX);
+	ptg = hw->blk[ICE_BLK_FD].xlt1.t[id];
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, false, false);
+	if (ret) {
+		dev_err(dev, "VF %u: deleting FDIR raw flow rule failed: %d\n",
+			vf->vf_id, ret);
+		return ret;
+	}
+
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi) {
+		dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id);
+		return -ENODEV;
+	}
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi) {
+		dev_err(dev, "Can not get FDIR ctrl_vsi for VF %u\n",
+			vf->vf_id);
+		return -ENODEV;
+	}
+
+	pi = &vf->fdir_prof_info[ptg];
+	if (pi->fdir_active_cnt != 0) {
+		pi->fdir_active_cnt--;
+		/* Remove the profile id flow if no active FDIR rule left */
+		if (!pi->fdir_active_cnt) {
+			vsi_num = ice_get_hw_vsi_num(hw, ctrl_vsi->idx);
+			ice_rem_prof_id_flow(hw, blk, vsi_num, id);
+
+			vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx);
+			ice_rem_prof_id_flow(hw, blk, vsi_num, id);
+		}
+	}
+
+	conf->parser_ena = false;
+	return 0;
+}
+
+/**
  * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1933,7 +2301,10 @@ int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg;
 	struct virtchnl_fdir_del *stat = NULL;
 	struct virtchnl_fdir_fltr_conf *conf;
+	struct ice_vf_fdir *fdir = &vf->fdir;
 	enum virtchnl_status_code v_ret;
+	struct ice_fdir_fltr *input;
+	enum ice_fltr_ptype flow;
 	struct device *dev;
 	struct ice_pf *pf;
 	int is_tun = 0;
@@ -1983,6 +2354,15 @@ int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_exit;
 	}
 
+	/* For raw FDIR filters created by the parser */
+	if (conf->parser_ena) {
+		ret = ice_vc_del_fdir_raw(vf, conf, &v_ret, stat, len);
+		if (ret)
+			goto err_del_tmr;
+		goto exit;
+	}
+
+	is_tun = ice_fdir_is_tunnel(conf->ttype);
 	ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
 	if (ret) {
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
@@ -1992,6 +2372,13 @@ int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_del_tmr;
 	}
 
+	/* Remove unused profiles to avoid unexpected behaviors */
+	input = &conf->input;
+	flow = input->flow_type;
+	if (fdir->fdir_fltr_cnt[flow][is_tun] == 1)
+		ice_vc_fdir_rem_prof(vf, flow, is_tun);
+
+exit:
 	kfree(stat);
 
 	return ret;
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
index 7aae7fdcfcdb..8c7a9b41fb63 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
@@ -3,6 +3,7 @@
 
 #include "ice_pf_vsi_vlan_ops.h"
 #include "ice_vf_vsi_vlan_ops.h"
+#include "ice_sf_vsi_vlan_ops.h"
 #include "ice_lib.h"
 #include "ice.h"
 
@@ -77,6 +78,9 @@ void ice_vsi_init_vlan_ops(struct ice_vsi *vsi)
 	case ICE_VSI_VF:
 		ice_vf_vsi_init_vlan_ops(vsi);
 		break;
+	case ICE_VSI_SF:
+		ice_sf_vsi_init_vlan_ops(vsi);
+		break;
 	default:
 		dev_dbg(ice_pf_to_dev(vsi->back), "%s does not support VLAN operations\n",
 			ice_vsi_type_str(vsi->type));
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index a65955eb23c0..334ae945d640 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -39,7 +39,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
 	       sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats));
 	memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0,
 	       sizeof(vsi_stat->tx_ring_stats[q_idx]->stats));
-	if (ice_is_xdp_ena_vsi(vsi))
+	if (vsi->xdp_rings)
 		memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0,
 		       sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats));
 }
@@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
 {
 	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
-	if (ice_is_xdp_ena_vsi(vsi)) {
-		synchronize_rcu();
+	if (vsi->xdp_rings)
 		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
-	}
 	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
 }
 
@@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
  * ice_qvec_cfg_msix - Enable IRQ for given queue vector
  * @vsi: the VSI that contains queue vector
  * @q_vector: queue vector
+ * @qid: queue index
  */
 static void
-ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid)
 {
 	u16 reg_idx = q_vector->reg_idx;
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
-	struct ice_tx_ring *tx_ring;
-	struct ice_rx_ring *rx_ring;
+	int q, _qid = qid;
 
 	ice_cfg_itr(hw, q_vector);
 
-	ice_for_each_tx_ring(tx_ring, q_vector->tx)
-		ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
-				      q_vector->tx.itr_idx);
+	for (q = 0; q < q_vector->num_ring_tx; q++) {
+		ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx);
+		_qid++;
+	}
+
+	_qid = qid;
 
-	ice_for_each_rx_ring(rx_ring, q_vector->rx)
-		ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
-				      q_vector->rx.itr_idx);
+	for (q = 0; q < q_vector->num_ring_rx; q++) {
+		ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx);
+		_qid++;
+	}
 
 	ice_flush(hw);
 }
@@ -163,7 +165,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 	struct ice_q_vector *q_vector;
 	struct ice_tx_ring *tx_ring;
 	struct ice_rx_ring *rx_ring;
-	int timeout = 50;
+	int fail = 0;
 	int err;
 
 	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
@@ -173,40 +175,33 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 	rx_ring = vsi->rx_rings[q_idx];
 	q_vector = rx_ring->q_vector;
 
-	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
-		timeout--;
-		if (!timeout)
-			return -EBUSY;
-		usleep_range(1000, 2000);
-	}
+	synchronize_net();
+	netif_carrier_off(vsi->netdev);
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
 
 	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
 	ice_qvec_toggle_napi(vsi, q_vector, false);
 
-	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
-
 	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
 	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
-	if (err)
-		return err;
-	if (ice_is_xdp_ena_vsi(vsi)) {
+	if (!fail)
+		fail = err;
+	if (vsi->xdp_rings) {
 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
 
 		memset(&txq_meta, 0, sizeof(txq_meta));
 		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
 		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
 					   &txq_meta);
-		if (err)
-			return err;
+		if (!fail)
+			fail = err;
 	}
-	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
-	if (err)
-		return err;
 
+	ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false);
 	ice_qp_clean_rings(vsi, q_idx);
 	ice_qp_reset_stats(vsi, q_idx);
 
-	return 0;
+	return fail;
 }
 
 /**
@@ -219,40 +214,47 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
 {
 	struct ice_q_vector *q_vector;
+	int fail = 0;
+	bool link_up;
 	int err;
 
 	err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx);
-	if (err)
-		return err;
+	if (!fail)
+		fail = err;
 
 	if (ice_is_xdp_ena_vsi(vsi)) {
 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
 
 		err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx);
-		if (err)
-			return err;
+		if (!fail)
+			fail = err;
 		ice_set_ring_xdp(xdp_ring);
 		ice_tx_xsk_pool(vsi, q_idx);
 	}
 
 	err = ice_vsi_cfg_single_rxq(vsi, q_idx);
-	if (err)
-		return err;
+	if (!fail)
+		fail = err;
 
 	q_vector = vsi->rx_rings[q_idx]->q_vector;
-	ice_qvec_cfg_msix(vsi, q_vector);
+	ice_qvec_cfg_msix(vsi, q_vector, q_idx);
 
 	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
-	if (err)
-		return err;
+	if (!fail)
+		fail = err;
 
 	ice_qvec_toggle_napi(vsi, q_vector, true);
 	ice_qvec_ena_irq(vsi, q_vector);
 
-	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
-	clear_bit(ICE_CFG_BUSY, vsi->state);
+	/* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */
+	synchronize_net();
+	ice_get_link_status(vsi->port_info, &link_up);
+	if (link_up) {
+		netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+		netif_carrier_on(vsi->netdev);
+	}
 
-	return 0;
+	return fail;
 }
 
 /**
@@ -287,7 +289,7 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 {
 	int err;
 
-	if (vsi->type != ICE_VSI_PF)
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_SF)
 		return -EINVAL;
 
 	if (qid >= vsi->netdev->real_num_rx_queues ||
@@ -379,7 +381,8 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 		goto failure;
 	}
 
-	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+	if_running = !test_bit(ICE_VSI_DOWN, vsi->state) &&
+		     ice_is_xdp_ena_vsi(vsi);
 
 	if (if_running) {
 		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
@@ -459,6 +462,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
 /**
  * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
  * @rx_ring: Rx ring
+ * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW
  * @count: The number of buffers to allocate
  *
  * Place the @count of descriptors onto Rx ring. Handle the ring wrap
@@ -467,7 +471,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
  *
  * Returns true if all allocations were successful, false if any fail.
  */
-static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
+				   struct xsk_buff_pool *xsk_pool, u16 count)
 {
 	u32 nb_buffs_extra = 0, nb_buffs = 0;
 	union ice_32b_rx_flex_desc *rx_desc;
@@ -479,8 +484,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 	xdp = ice_xdp_buf(rx_ring, ntu);
 
 	if (ntu + count >= rx_ring->count) {
-		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
-						   rx_desc,
+		nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc,
 						   rx_ring->count - ntu);
 		if (nb_buffs_extra != rx_ring->count - ntu) {
 			ntu += nb_buffs_extra;
@@ -493,7 +497,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 		ice_release_rx_desc(rx_ring, 0);
 	}
 
-	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+	nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count);
 
 	ntu += nb_buffs;
 	if (ntu == rx_ring->count)
@@ -509,6 +513,7 @@ exit:
 /**
  * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
  * @rx_ring: Rx ring
+ * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW
  * @count: The number of buffers to allocate
  *
  * Wrapper for internal allocation routine; figure out how many tail
@@ -516,7 +521,8 @@ exit:
  *
  * Returns true if all calls to internal alloc routine succeeded
  */
-bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
+			  struct xsk_buff_pool *xsk_pool, u16 count)
 {
 	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
 	u16 leftover, i, tail_bumps;
@@ -525,9 +531,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 	leftover = count - (tail_bumps * rx_thresh);
 
 	for (i = 0; i < tail_bumps; i++)
-		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+		if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh))
 			return false;
-	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
+	return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover);
 }
 
 /**
@@ -596,8 +602,10 @@ out:
 /**
  * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
  * @xdp_ring: XDP Tx ring
+ * @xsk_pool: AF_XDP buffer pool pointer
  */
-static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
+static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring,
+				struct xsk_buff_pool *xsk_pool)
 {
 	u16 ntc = xdp_ring->next_to_clean;
 	struct ice_tx_desc *tx_desc;
@@ -648,7 +656,7 @@ skip:
 	if (xdp_ring->next_to_clean >= cnt)
 		xdp_ring->next_to_clean -= cnt;
 	if (xsk_frames)
-		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+		xsk_tx_completed(xsk_pool, xsk_frames);
 
 	return completed_frames;
 }
@@ -657,6 +665,7 @@ skip:
  * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
  * @xdp: XDP buffer to xmit
  * @xdp_ring: XDP ring to produce descriptor onto
+ * @xsk_pool: AF_XDP buffer pool pointer
  *
  * note that this function works directly on xdp_buff, no need to convert
  * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning
@@ -666,7 +675,8 @@ skip:
  * was not enough space on XDP ring
  */
 static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
-			      struct ice_tx_ring *xdp_ring)
+			      struct ice_tx_ring *xdp_ring,
+			      struct xsk_buff_pool *xsk_pool)
 {
 	struct skb_shared_info *sinfo = NULL;
 	u32 size = xdp->data_end - xdp->data;
@@ -680,7 +690,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
 
 	free_space = ICE_DESC_UNUSED(xdp_ring);
 	if (free_space < ICE_RING_QUARTER(xdp_ring))
-		free_space += ice_clean_xdp_irq_zc(xdp_ring);
+		free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool);
 
 	if (unlikely(!free_space))
 		goto busy;
@@ -700,7 +710,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
 		dma_addr_t dma;
 
 		dma = xsk_buff_xdp_get_dma(xdp);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+		xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size);
 
 		tx_buf->xdp = xdp;
 		tx_buf->type = ICE_TX_BUF_XSK_TX;
@@ -742,12 +752,14 @@ busy:
  * @xdp: xdp_buff used as input to the XDP program
  * @xdp_prog: XDP program to run
  * @xdp_ring: ring to be used for XDP_TX action
+ * @xsk_pool: AF_XDP buffer pool pointer
  *
  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
  */
 static int
 ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
-	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+	       struct xsk_buff_pool *xsk_pool)
 {
 	int err, result = ICE_XDP_PASS;
 	u32 act;
@@ -758,7 +770,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 		if (!err)
 			return ICE_XDP_REDIR;
-		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+		if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS)
 			result = ICE_XDP_EXIT;
 		else
 			result = ICE_XDP_CONSUMED;
@@ -769,7 +781,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		result = ice_xmit_xdp_tx_zc(xdp, xdp_ring);
+		result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool);
 		if (result == ICE_XDP_CONSUMED)
 			goto out_failure;
 		break;
@@ -821,14 +833,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
 /**
  * ice_clean_rx_irq_zc - consumes packets from the hardware ring
  * @rx_ring: AF_XDP Rx ring
+ * @xsk_pool: AF_XDP buffer pool pointer
  * @budget: NAPI budget
  *
  * Returns number of processed packets on success, remaining budget on failure.
  */
-int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
+			struct xsk_buff_pool *xsk_pool,
+			int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
-	struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool;
 	u32 ntc = rx_ring->next_to_clean;
 	u32 ntu = rx_ring->next_to_use;
 	struct xdp_buff *first = NULL;
@@ -891,7 +905,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 		if (ice_is_non_eop(rx_ring, rx_desc))
 			continue;
 
-		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
+		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
+					 xsk_pool);
 		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
 			xdp_xmit |= xdp_res;
 		} else if (xdp_res == ICE_XDP_EXIT) {
@@ -940,7 +955,8 @@ construct_skb:
 	rx_ring->next_to_clean = ntc;
 	entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
 	if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
-		failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
+		failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool,
+						 entries_to_alloc);
 
 	ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
 	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
@@ -963,17 +979,19 @@ construct_skb:
 /**
  * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
  * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
  * @desc: AF_XDP descriptor to pull the DMA address and length from
  * @total_bytes: bytes accumulator that will be used for stats update
  */
-static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring,
+			 struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc,
 			 unsigned int *total_bytes)
 {
 	struct ice_tx_desc *tx_desc;
 	dma_addr_t dma;
 
-	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
-	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+	dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr);
+	xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len);
 
 	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
 	tx_desc->buf_addr = cpu_to_le64(dma);
@@ -986,10 +1004,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
 /**
  * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
  * @total_bytes: bytes accumulator that will be used for stats update
  */
-static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring,
+			       struct xsk_buff_pool *xsk_pool,
+			       struct xdp_desc *descs,
 			       unsigned int *total_bytes)
 {
 	u16 ntu = xdp_ring->next_to_use;
@@ -999,8 +1020,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
 	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
 		dma_addr_t dma;
 
-		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
-		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
+		dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
+		xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
 
 		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
 		tx_desc->buf_addr = cpu_to_le64(dma);
@@ -1016,60 +1037,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
 /**
  * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
  * @nb_pkts: count of packets to be send
  * @total_bytes: bytes accumulator that will be used for stats update
  */
-static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
-				u32 nb_pkts, unsigned int *total_bytes)
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring,
+				struct xsk_buff_pool *xsk_pool,
+				struct xdp_desc *descs, u32 nb_pkts,
+				unsigned int *total_bytes)
 {
 	u32 batched, leftover, i;
 
 	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
 	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
 	for (i = 0; i < batched; i += PKTS_PER_BATCH)
-		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+		ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes);
 	for (; i < batched + leftover; i++)
-		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+		ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes);
 }
 
 /**
  * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @xsk_pool: AF_XDP buffer pool pointer
  *
  * Returns true if there is no more work that needs to be done, false otherwise
  */
-bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool)
 {
-	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+	struct xdp_desc *descs = xsk_pool->tx_descs;
 	u32 nb_pkts, nb_processed = 0;
 	unsigned int total_bytes = 0;
 	int budget;
 
-	ice_clean_xdp_irq_zc(xdp_ring);
+	ice_clean_xdp_irq_zc(xdp_ring, xsk_pool);
+
+	if (!netif_carrier_ok(xdp_ring->vsi->netdev) ||
+	    !netif_running(xdp_ring->vsi->netdev))
+		return true;
 
 	budget = ICE_DESC_UNUSED(xdp_ring);
 	budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
 
-	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+	nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
 	if (!nb_pkts)
 		return true;
 
 	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
 		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
-		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+		ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed,
+				    &total_bytes);
 		xdp_ring->next_to_use = 0;
 	}
 
-	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
-			    &total_bytes);
+	ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed],
+			    nb_pkts - nb_processed, &total_bytes);
 
 	ice_set_rs_bit(xdp_ring);
 	ice_xdp_ring_update_tail(xdp_ring);
 	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
 
-	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
-		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
+	if (xsk_uses_need_wakeup(xsk_pool))
+		xsk_set_tx_need_wakeup(xsk_pool);
 
 	return nb_pkts < budget;
 }
@@ -1091,7 +1121,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_tx_ring *ring;
 
-	if (test_bit(ICE_VSI_DOWN, vsi->state))
+	if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev))
 		return -ENETDOWN;
 
 	if (!ice_is_xdp_ena_vsi(vsi))
@@ -1102,7 +1132,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 
 	ring = vsi->rx_rings[queue_id]->xdp_ring;
 
-	if (!ring->xsk_pool)
+	if (!READ_ONCE(ring->xsk_pool))
 		return -EINVAL;
 
 	/* The idea here is that if NAPI is running, mark a miss, so
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 6fa181f080ef..45adeb513253 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -20,16 +20,20 @@ struct ice_vsi;
 #ifdef CONFIG_XDP_SOCKETS
 int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
 		       u16 qid);
-int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
+			struct xsk_buff_pool *xsk_pool,
+			int budget);
 int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
-bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
+			  struct xsk_buff_pool *xsk_pool, u16 count);
 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
 void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
 void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
-bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool);
 int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
 #else
-static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
+static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
+			       struct xsk_buff_pool __always_unused *xsk_pool)
 {
 	return false;
 }
@@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
 
 static inline int
 ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
+		    struct xsk_buff_pool __always_unused *xsk_pool,
 		    int __always_unused budget)
 {
 	return 0;
@@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
 
 static inline bool
 ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
+		     struct xsk_buff_pool __always_unused *xsk_pool,
 		     u16 __always_unused count)
 {
 	return false;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c
index 3df9935685e9..6c913a703df6 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -97,8 +97,10 @@ static int idpf_intr_reg_init(struct idpf_vport *vport)
 		intr->dyn_ctl = idpf_get_reg_addr(adapter,
 						  reg_vals[vec_id].dyn_ctl_reg);
 		intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M;
+		intr->dyn_ctl_intena_msk_m = PF_GLINT_DYN_CTL_INTENA_MSK_M;
 		intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S;
 		intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S;
+		intr->dyn_ctl_wb_on_itr_m = PF_GLINT_DYN_CTL_WB_ON_ITR_M;
 
 		spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
 					       IDPF_PF_ITR_IDX_SPACING);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 5dbf2b4ba1b0..4f20343e49a9 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -357,24 +357,11 @@ int idpf_intr_req(struct idpf_adapter *adapter)
 		goto free_msix;
 	}
 
-	if (adapter->req_vec_chunks) {
-		struct virtchnl2_vector_chunks *vchunks;
-		struct virtchnl2_alloc_vectors *ac;
-
-		ac = adapter->req_vec_chunks;
-		vchunks = &ac->vchunks;
-
-		num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs,
-					       vchunks);
-		if (num_vec_ids < v_actual) {
-			err = -EINVAL;
-			goto free_vecids;
-		}
-	} else {
-		int i;
-
-		for (i = 0; i < v_actual; i++)
-			vecids[i] = i;
+	num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs,
+				       &adapter->req_vec_chunks->vchunks);
+	if (num_vec_ids < v_actual) {
+		err = -EINVAL;
+		goto free_vecids;
 	}
 
 	for (vector = 0; vector < v_actual; vector++) {
@@ -900,8 +887,8 @@ static void idpf_vport_stop(struct idpf_vport *vport)
 
 	vport->link_up = false;
 	idpf_vport_intr_deinit(vport);
-	idpf_vport_intr_rel(vport);
 	idpf_vport_queues_rel(vport);
+	idpf_vport_intr_rel(vport);
 	np->state = __IDPF_VPORT_DOWN;
 }
 
@@ -1335,9 +1322,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
 /**
  * idpf_vport_open - Bring up a vport
  * @vport: vport to bring up
- * @alloc_res: allocate queue resources
  */
-static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
+static int idpf_vport_open(struct idpf_vport *vport)
 {
 	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
 	struct idpf_adapter *adapter = vport->adapter;
@@ -1350,45 +1336,43 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
 	/* we do not allow interface up just yet */
 	netif_carrier_off(vport->netdev);
 
-	if (alloc_res) {
-		err = idpf_vport_queues_alloc(vport);
-		if (err)
-			return err;
-	}
-
 	err = idpf_vport_intr_alloc(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n",
 			vport->vport_id, err);
-		goto queues_rel;
+		return err;
 	}
 
+	err = idpf_vport_queues_alloc(vport);
+	if (err)
+		goto intr_rel;
+
 	err = idpf_vport_queue_ids_init(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n",
 			vport->vport_id, err);
-		goto intr_rel;
+		goto queues_rel;
 	}
 
 	err = idpf_vport_intr_init(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n",
 			vport->vport_id, err);
-		goto intr_rel;
+		goto queues_rel;
 	}
 
 	err = idpf_rx_bufs_init_all(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n",
 			vport->vport_id, err);
-		goto intr_rel;
+		goto queues_rel;
 	}
 
 	err = idpf_queue_reg_init(vport);
 	if (err) {
 		dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n",
 			vport->vport_id, err);
-		goto intr_rel;
+		goto queues_rel;
 	}
 
 	idpf_rx_init_buf_tail(vport);
@@ -1455,10 +1439,10 @@ unmap_queue_vectors:
 	idpf_send_map_unmap_queue_vector_msg(vport, false);
 intr_deinit:
 	idpf_vport_intr_deinit(vport);
-intr_rel:
-	idpf_vport_intr_rel(vport);
 queues_rel:
 	idpf_vport_queues_rel(vport);
+intr_rel:
+	idpf_vport_intr_rel(vport);
 
 	return err;
 }
@@ -1539,7 +1523,7 @@ void idpf_init_task(struct work_struct *work)
 	np = netdev_priv(vport->netdev);
 	np->state = __IDPF_VPORT_DOWN;
 	if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
-		idpf_vport_open(vport, true);
+		idpf_vport_open(vport);
 
 	/* Spawn and return 'idpf_init_task' work queue until all the
 	 * default vports are created
@@ -1898,9 +1882,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 		goto free_vport;
 	}
 
-	err = idpf_vport_queues_alloc(new_vport);
-	if (err)
-		goto free_vport;
 	if (current_state <= __IDPF_VPORT_DOWN) {
 		idpf_send_delete_queues_msg(vport);
 	} else {
@@ -1932,17 +1913,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 
 	err = idpf_set_real_num_queues(vport);
 	if (err)
-		goto err_reset;
+		goto err_open;
 
 	if (current_state == __IDPF_VPORT_UP)
-		err = idpf_vport_open(vport, false);
+		err = idpf_vport_open(vport);
 
 	kfree(new_vport);
 
 	return err;
 
 err_reset:
-	idpf_vport_queues_rel(new_vport);
+	idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq,
+				 vport->num_rxq, vport->num_bufq);
+
+err_open:
+	if (current_state == __IDPF_VPORT_UP)
+		idpf_vport_open(vport);
+
 free_vport:
 	kfree(new_vport);
 
@@ -2171,7 +2158,7 @@ static int idpf_open(struct net_device *netdev)
 	idpf_vport_ctrl_lock(netdev);
 	vport = idpf_netdev_to_vport(netdev);
 
-	err = idpf_vport_open(vport, true);
+	err = idpf_vport_open(vport);
 
 	idpf_vport_ctrl_unlock(netdev);
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
index fe64febf7436..dfd7cf1d9aa0 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -2,6 +2,7 @@
 /* Copyright (C) 2023 Intel Corporation */
 
 #include <net/libeth/rx.h>
+#include <net/libeth/tx.h>
 
 #include "idpf.h"
 
@@ -224,6 +225,7 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 		/* record length, and DMA address */
 		dma_unmap_len_set(tx_buf, len, size);
 		dma_unmap_addr_set(tx_buf, dma, dma);
+		tx_buf->type = LIBETH_SQE_FRAG;
 
 		/* align size to end of page */
 		max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
@@ -237,14 +239,17 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 								  offsets,
 								  max_data,
 								  td_tag);
-			tx_desc++;
-			i++;
-
-			if (i == tx_q->desc_count) {
+			if (unlikely(++i == tx_q->desc_count)) {
+				tx_buf = &tx_q->tx_buf[0];
 				tx_desc = &tx_q->base_tx[0];
 				i = 0;
+			} else {
+				tx_buf++;
+				tx_desc++;
 			}
 
+			tx_buf->type = LIBETH_SQE_EMPTY;
+
 			dma += max_data;
 			size -= max_data;
 
@@ -257,12 +262,14 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 
 		tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
 							  size, td_tag);
-		tx_desc++;
-		i++;
 
-		if (i == tx_q->desc_count) {
+		if (unlikely(++i == tx_q->desc_count)) {
+			tx_buf = &tx_q->tx_buf[0];
 			tx_desc = &tx_q->base_tx[0];
 			i = 0;
+		} else {
+			tx_buf++;
+			tx_desc++;
 		}
 
 		size = skb_frag_size(frag);
@@ -270,8 +277,6 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 
 		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
 				       DMA_TO_DEVICE);
-
-		tx_buf = &tx_q->tx_buf[i];
 	}
 
 	skb_tx_timestamp(first->skb);
@@ -282,13 +287,13 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 	tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
 						  size, td_tag);
 
-	IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
+	first->type = LIBETH_SQE_SKB;
+	first->rs_idx = i;
 
-	/* set next_to_watch value indicating a packet is present */
-	first->next_to_watch = tx_desc;
+	IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
 
 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
-	netdev_tx_sent_queue(nq, first->bytecount);
+	netdev_tx_sent_queue(nq, first->bytes);
 
 	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
 }
@@ -306,8 +311,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
 	struct idpf_base_tx_ctx_desc *ctx_desc;
 	int ntu = txq->next_to_use;
 
-	memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf));
-	txq->tx_buf[ntu].ctx_entry = true;
+	txq->tx_buf[ntu].type = LIBETH_SQE_CTX;
 
 	ctx_desc = &txq->base_ctx[ntu];
 
@@ -371,6 +375,10 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
 				      IDPF_TX_DESCS_FOR_CTX)) {
 		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
 
+		u64_stats_update_begin(&tx_q->stats_sync);
+		u64_stats_inc(&tx_q->q_stats.q_busy);
+		u64_stats_update_end(&tx_q->stats_sync);
+
 		return NETDEV_TX_BUSY;
 	}
 
@@ -396,11 +404,11 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
 	first->skb = skb;
 
 	if (tso) {
-		first->gso_segs = offload.tso_segs;
-		first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len);
+		first->packets = offload.tso_segs;
+		first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len);
 	} else {
-		first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
-		first->gso_segs = 1;
+		first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+		first->packets = 1;
 	}
 	idpf_tx_singleq_map(tx_q, first, &offload);
 
@@ -420,10 +428,15 @@ out_drop:
 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
 				  int *cleaned)
 {
-	unsigned int total_bytes = 0, total_pkts = 0;
+	struct libeth_sq_napi_stats ss = { };
 	struct idpf_base_tx_desc *tx_desc;
 	u32 budget = tx_q->clean_budget;
 	s16 ntc = tx_q->next_to_clean;
+	struct libeth_cq_pp cp = {
+		.dev	= tx_q->dev,
+		.ss	= &ss,
+		.napi	= napi_budget,
+	};
 	struct idpf_netdev_priv *np;
 	struct idpf_tx_buf *tx_buf;
 	struct netdev_queue *nq;
@@ -441,47 +454,26 @@ static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
 		 * such. We can skip this descriptor since there is no buffer
 		 * to clean.
 		 */
-		if (tx_buf->ctx_entry) {
-			/* Clear this flag here to avoid stale flag values when
-			 * this buffer is used for actual data in the future.
-			 * There are cases where the tx_buf struct / the flags
-			 * field will not be cleared before being reused.
-			 */
-			tx_buf->ctx_entry = false;
+		if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) {
+			tx_buf->type = LIBETH_SQE_EMPTY;
 			goto fetch_next_txq_desc;
 		}
 
-		/* if next_to_watch is not set then no work pending */
-		eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch;
-		if (!eop_desc)
+		if (unlikely(tx_buf->type != LIBETH_SQE_SKB))
 			break;
 
-		/* prevent any other reads prior to eop_desc */
+		/* prevent any other reads prior to type */
 		smp_rmb();
 
+		eop_desc = &tx_q->base_tx[tx_buf->rs_idx];
+
 		/* if the descriptor isn't done, no work yet to do */
 		if (!(eop_desc->qw1 &
 		      cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
 			break;
 
-		/* clear next_to_watch to prevent false hangs */
-		tx_buf->next_to_watch = NULL;
-
 		/* update the statistics for this packet */
-		total_bytes += tx_buf->bytecount;
-		total_pkts += tx_buf->gso_segs;
-
-		napi_consume_skb(tx_buf->skb, napi_budget);
-
-		/* unmap skb header data */
-		dma_unmap_single(tx_q->dev,
-				 dma_unmap_addr(tx_buf, dma),
-				 dma_unmap_len(tx_buf, len),
-				 DMA_TO_DEVICE);
-
-		/* clear tx_buf data */
-		tx_buf->skb = NULL;
-		dma_unmap_len_set(tx_buf, len, 0);
+		libeth_tx_complete(tx_buf, &cp);
 
 		/* unmap remaining buffers */
 		while (tx_desc != eop_desc) {
@@ -495,13 +487,7 @@ static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
 			}
 
 			/* unmap any remaining paged data */
-			if (dma_unmap_len(tx_buf, len)) {
-				dma_unmap_page(tx_q->dev,
-					       dma_unmap_addr(tx_buf, dma),
-					       dma_unmap_len(tx_buf, len),
-					       DMA_TO_DEVICE);
-				dma_unmap_len_set(tx_buf, len, 0);
-			}
+			libeth_tx_complete(tx_buf, &cp);
 		}
 
 		/* update budget only if we did something */
@@ -521,11 +507,11 @@ fetch_next_txq_desc:
 	ntc += tx_q->desc_count;
 	tx_q->next_to_clean = ntc;
 
-	*cleaned += total_pkts;
+	*cleaned += ss.packets;
 
 	u64_stats_update_begin(&tx_q->stats_sync);
-	u64_stats_add(&tx_q->q_stats.packets, total_pkts);
-	u64_stats_add(&tx_q->q_stats.bytes, total_bytes);
+	u64_stats_add(&tx_q->q_stats.packets, ss.packets);
+	u64_stats_add(&tx_q->q_stats.bytes, ss.bytes);
 	u64_stats_update_end(&tx_q->stats_sync);
 
 	np = netdev_priv(tx_q->netdev);
@@ -533,7 +519,7 @@ fetch_next_txq_desc:
 
 	dont_wake = np->state != __IDPF_VPORT_UP ||
 		    !netif_carrier_ok(tx_q->netdev);
-	__netif_txq_completed_wake(nq, total_pkts, total_bytes,
+	__netif_txq_completed_wake(nq, ss.packets, ss.bytes,
 				   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
 				   dont_wake);
 
@@ -1134,8 +1120,10 @@ int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
 						    &work_done);
 
 	/* If work not completed, return budget and polling will return */
-	if (!clean_complete)
+	if (!clean_complete) {
+		idpf_vport_intr_set_wb_on_itr(q_vector);
 		return budget;
+	}
 
 	work_done = min_t(int, work_done, budget - 1);
 
@@ -1144,6 +1132,8 @@ int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	if (likely(napi_complete_done(napi, work_done)))
 		idpf_vport_intr_update_itr_ena_irq(q_vector);
+	else
+		idpf_vport_intr_set_wb_on_itr(q_vector);
 
 	return work_done;
 }
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index af2879f03b8d..d4e6f0e10487 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2,10 +2,19 @@
 /* Copyright (C) 2023 Intel Corporation */
 
 #include <net/libeth/rx.h>
+#include <net/libeth/tx.h>
 
 #include "idpf.h"
 #include "idpf_virtchnl.h"
 
+struct idpf_tx_stash {
+	struct hlist_node hlist;
+	struct libeth_sqe buf;
+};
+
+#define idpf_tx_buf_compl_tag(buf)	(*(u32 *)&(buf)->priv)
+LIBETH_SQE_CHECK_PRIV(u32);
+
 static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
 			       unsigned int count);
 
@@ -61,41 +70,20 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 }
 
 /**
- * idpf_tx_buf_rel - Release a Tx buffer
- * @tx_q: the queue that owns the buffer
- * @tx_buf: the buffer to free
- */
-static void idpf_tx_buf_rel(struct idpf_tx_queue *tx_q,
-			    struct idpf_tx_buf *tx_buf)
-{
-	if (tx_buf->skb) {
-		if (dma_unmap_len(tx_buf, len))
-			dma_unmap_single(tx_q->dev,
-					 dma_unmap_addr(tx_buf, dma),
-					 dma_unmap_len(tx_buf, len),
-					 DMA_TO_DEVICE);
-		dev_kfree_skb_any(tx_buf->skb);
-	} else if (dma_unmap_len(tx_buf, len)) {
-		dma_unmap_page(tx_q->dev,
-			       dma_unmap_addr(tx_buf, dma),
-			       dma_unmap_len(tx_buf, len),
-			       DMA_TO_DEVICE);
-	}
-
-	tx_buf->next_to_watch = NULL;
-	tx_buf->skb = NULL;
-	tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
-	dma_unmap_len_set(tx_buf, len, 0);
-}
-
-/**
  * idpf_tx_buf_rel_all - Free any empty Tx buffers
  * @txq: queue to be cleaned
  */
 static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
 {
+	struct libeth_sq_napi_stats ss = { };
 	struct idpf_buf_lifo *buf_stack;
-	u16 i;
+	struct idpf_tx_stash *stash;
+	struct libeth_cq_pp cp = {
+		.dev	= txq->dev,
+		.ss	= &ss,
+	};
+	struct hlist_node *tmp;
+	u32 i, tag;
 
 	/* Buffers already cleared, nothing to do */
 	if (!txq->tx_buf)
@@ -103,7 +91,7 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
 
 	/* Free all the Tx buffer sk_buffs */
 	for (i = 0; i < txq->desc_count; i++)
-		idpf_tx_buf_rel(txq, &txq->tx_buf[i]);
+		libeth_tx_complete(&txq->tx_buf[i], &cp);
 
 	kfree(txq->tx_buf);
 	txq->tx_buf = NULL;
@@ -115,6 +103,20 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
 	if (!buf_stack->bufs)
 		return;
 
+	/*
+	 * If a Tx timeout occurred, there are potentially still bufs in the
+	 * hash table, free them here.
+	 */
+	hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash,
+			   hlist) {
+		if (!stash)
+			continue;
+
+		libeth_tx_complete(&stash->buf, &cp);
+		hash_del(&stash->hlist);
+		idpf_buf_lifo_push(buf_stack, stash);
+	}
+
 	for (i = 0; i < buf_stack->size; i++)
 		kfree(buf_stack->bufs[i]);
 
@@ -131,6 +133,7 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
 static void idpf_tx_desc_rel(struct idpf_tx_queue *txq)
 {
 	idpf_tx_buf_rel_all(txq);
+	netdev_tx_reset_subqueue(txq->netdev, txq->idx);
 
 	if (!txq->desc_ring)
 		return;
@@ -203,10 +206,6 @@ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q)
 	if (!tx_q->tx_buf)
 		return -ENOMEM;
 
-	/* Initialize tx_bufs with invalid completion tags */
-	for (i = 0; i < tx_q->desc_count; i++)
-		tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
-
 	if (!idpf_queue_has(FLOW_SCH_EN, tx_q))
 		return 0;
 
@@ -1656,37 +1655,6 @@ static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q)
 }
 
 /**
- * idpf_tx_splitq_clean_hdr - Clean TX buffer resources for header portion of
- * packet
- * @tx_q: tx queue to clean buffer from
- * @tx_buf: buffer to be cleaned
- * @cleaned: pointer to stats struct to track cleaned packets/bytes
- * @napi_budget: Used to determine if we are in netpoll
- */
-static void idpf_tx_splitq_clean_hdr(struct idpf_tx_queue *tx_q,
-				     struct idpf_tx_buf *tx_buf,
-				     struct idpf_cleaned_stats *cleaned,
-				     int napi_budget)
-{
-	napi_consume_skb(tx_buf->skb, napi_budget);
-
-	if (dma_unmap_len(tx_buf, len)) {
-		dma_unmap_single(tx_q->dev,
-				 dma_unmap_addr(tx_buf, dma),
-				 dma_unmap_len(tx_buf, len),
-				 DMA_TO_DEVICE);
-
-		dma_unmap_len_set(tx_buf, len, 0);
-	}
-
-	/* clear tx_buf data */
-	tx_buf->skb = NULL;
-
-	cleaned->bytes += tx_buf->bytecount;
-	cleaned->packets += tx_buf->gso_segs;
-}
-
-/**
  * idpf_tx_clean_stashed_bufs - clean bufs that were stored for
  * out of order completions
  * @txq: queue to clean
@@ -1696,33 +1664,28 @@ static void idpf_tx_splitq_clean_hdr(struct idpf_tx_queue *tx_q,
  */
 static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq,
 				       u16 compl_tag,
-				       struct idpf_cleaned_stats *cleaned,
+				       struct libeth_sq_napi_stats *cleaned,
 				       int budget)
 {
 	struct idpf_tx_stash *stash;
 	struct hlist_node *tmp_buf;
+	struct libeth_cq_pp cp = {
+		.dev	= txq->dev,
+		.ss	= cleaned,
+		.napi	= budget,
+	};
 
 	/* Buffer completion */
 	hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf,
 				    hlist, compl_tag) {
-		if (unlikely(stash->buf.compl_tag != (int)compl_tag))
+		if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag))
 			continue;
 
-		if (stash->buf.skb) {
-			idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned,
-						 budget);
-		} else if (dma_unmap_len(&stash->buf, len)) {
-			dma_unmap_page(txq->dev,
-				       dma_unmap_addr(&stash->buf, dma),
-				       dma_unmap_len(&stash->buf, len),
-				       DMA_TO_DEVICE);
-			dma_unmap_len_set(&stash->buf, len, 0);
-		}
+		hash_del(&stash->hlist);
+		libeth_tx_complete(&stash->buf, &cp);
 
 		/* Push shadow buf back onto stack */
 		idpf_buf_lifo_push(&txq->stash->buf_stack, stash);
-
-		hash_del(&stash->hlist);
 	}
 }
 
@@ -1737,8 +1700,7 @@ static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq,
 {
 	struct idpf_tx_stash *stash;
 
-	if (unlikely(!dma_unmap_addr(tx_buf, dma) &&
-		     !dma_unmap_len(tx_buf, len)))
+	if (unlikely(tx_buf->type <= LIBETH_SQE_CTX))
 		return 0;
 
 	stash = idpf_buf_lifo_pop(&txq->stash->buf_stack);
@@ -1751,29 +1713,27 @@ static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq,
 
 	/* Store buffer params in shadow buffer */
 	stash->buf.skb = tx_buf->skb;
-	stash->buf.bytecount = tx_buf->bytecount;
-	stash->buf.gso_segs = tx_buf->gso_segs;
+	stash->buf.bytes = tx_buf->bytes;
+	stash->buf.packets = tx_buf->packets;
+	stash->buf.type = tx_buf->type;
+	stash->buf.nr_frags = tx_buf->nr_frags;
 	dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma));
 	dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len));
-	stash->buf.compl_tag = tx_buf->compl_tag;
+	idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf);
 
 	/* Add buffer to buf_hash table to be freed later */
 	hash_add(txq->stash->sched_buf_hash, &stash->hlist,
-		 stash->buf.compl_tag);
-
-	memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+		 idpf_tx_buf_compl_tag(&stash->buf));
 
-	/* Reinitialize buf_id portion of tag */
-	tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+	tx_buf->type = LIBETH_SQE_EMPTY;
 
 	return 0;
 }
 
 #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf)	\
 do {								\
-	(ntc)++;						\
-	if (unlikely(!(ntc))) {					\
-		ntc -= (txq)->desc_count;			\
+	if (unlikely(++(ntc) == (txq)->desc_count)) {		\
+		ntc = 0;					\
 		buf = (txq)->tx_buf;				\
 		desc = &(txq)->flex_tx[0];			\
 	} else {						\
@@ -1797,69 +1757,71 @@ do {								\
  * Separate packet completion events will be reported on the completion queue,
  * and the buffers will be cleaned separately. The stats are not updated from
  * this function when using flow-based scheduling.
+ *
+ * Furthermore, in flow scheduling mode, check to make sure there are enough
+ * reserve buffers to stash the packet. If there are not, return early, which
+ * will leave next_to_clean pointing to the packet that failed to be stashed.
+ *
+ * Return: false in the scenario above, true otherwise.
  */
-static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end,
+static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end,
 				 int napi_budget,
-				 struct idpf_cleaned_stats *cleaned,
+				 struct libeth_sq_napi_stats *cleaned,
 				 bool descs_only)
 {
 	union idpf_tx_flex_desc *next_pending_desc = NULL;
 	union idpf_tx_flex_desc *tx_desc;
-	s16 ntc = tx_q->next_to_clean;
+	u32 ntc = tx_q->next_to_clean;
+	struct libeth_cq_pp cp = {
+		.dev	= tx_q->dev,
+		.ss	= cleaned,
+		.napi	= napi_budget,
+	};
 	struct idpf_tx_buf *tx_buf;
+	bool clean_complete = true;
 
 	tx_desc = &tx_q->flex_tx[ntc];
 	next_pending_desc = &tx_q->flex_tx[end];
 	tx_buf = &tx_q->tx_buf[ntc];
-	ntc -= tx_q->desc_count;
 
 	while (tx_desc != next_pending_desc) {
-		union idpf_tx_flex_desc *eop_desc;
+		u32 eop_idx;
 
 		/* If this entry in the ring was used as a context descriptor,
-		 * it's corresponding entry in the buffer ring will have an
-		 * invalid completion tag since no buffer was used.  We can
-		 * skip this descriptor since there is no buffer to clean.
+		 * it's corresponding entry in the buffer ring is reserved. We
+		 * can skip this descriptor since there is no buffer to clean.
 		 */
-		if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG))
+		if (tx_buf->type <= LIBETH_SQE_CTX)
 			goto fetch_next_txq_desc;
 
-		eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch;
+		if (unlikely(tx_buf->type != LIBETH_SQE_SKB))
+			break;
 
-		/* clear next_to_watch to prevent false hangs */
-		tx_buf->next_to_watch = NULL;
+		eop_idx = tx_buf->rs_idx;
 
 		if (descs_only) {
-			if (idpf_stash_flow_sch_buffers(tx_q, tx_buf))
+			if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) {
+				clean_complete = false;
 				goto tx_splitq_clean_out;
+			}
+
+			idpf_stash_flow_sch_buffers(tx_q, tx_buf);
 
-			while (tx_desc != eop_desc) {
+			while (ntc != eop_idx) {
 				idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
 							      tx_desc, tx_buf);
-
-				if (dma_unmap_len(tx_buf, len)) {
-					if (idpf_stash_flow_sch_buffers(tx_q,
-									tx_buf))
-						goto tx_splitq_clean_out;
-				}
+				idpf_stash_flow_sch_buffers(tx_q, tx_buf);
 			}
 		} else {
-			idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned,
-						 napi_budget);
+			libeth_tx_complete(tx_buf, &cp);
 
 			/* unmap remaining buffers */
-			while (tx_desc != eop_desc) {
+			while (ntc != eop_idx) {
 				idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
 							      tx_desc, tx_buf);
 
 				/* unmap any remaining paged data */
-				if (dma_unmap_len(tx_buf, len)) {
-					dma_unmap_page(tx_q->dev,
-						       dma_unmap_addr(tx_buf, dma),
-						       dma_unmap_len(tx_buf, len),
-						       DMA_TO_DEVICE);
-					dma_unmap_len_set(tx_buf, len, 0);
-				}
+				libeth_tx_complete(tx_buf, &cp);
 			}
 		}
 
@@ -1868,8 +1830,9 @@ fetch_next_txq_desc:
 	}
 
 tx_splitq_clean_out:
-	ntc += tx_q->desc_count;
 	tx_q->next_to_clean = ntc;
+
+	return clean_complete;
 }
 
 #define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf)	\
@@ -1895,57 +1858,68 @@ do {							\
  * this completion tag.
  */
 static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag,
-				   struct idpf_cleaned_stats *cleaned,
+				   struct libeth_sq_napi_stats *cleaned,
 				   int budget)
 {
 	u16 idx = compl_tag & txq->compl_tag_bufid_m;
 	struct idpf_tx_buf *tx_buf = NULL;
-	u16 ntc = txq->next_to_clean;
-	u16 num_descs_cleaned = 0;
-	u16 orig_idx = idx;
+	struct libeth_cq_pp cp = {
+		.dev	= txq->dev,
+		.ss	= cleaned,
+		.napi	= budget,
+	};
+	u16 ntc, orig_idx = idx;
 
 	tx_buf = &txq->tx_buf[idx];
 
-	while (tx_buf->compl_tag == (int)compl_tag) {
-		if (tx_buf->skb) {
-			idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget);
-		} else if (dma_unmap_len(tx_buf, len)) {
-			dma_unmap_page(txq->dev,
-				       dma_unmap_addr(tx_buf, dma),
-				       dma_unmap_len(tx_buf, len),
-				       DMA_TO_DEVICE);
-			dma_unmap_len_set(tx_buf, len, 0);
-		}
+	if (unlikely(tx_buf->type <= LIBETH_SQE_CTX ||
+		     idpf_tx_buf_compl_tag(tx_buf) != compl_tag))
+		return false;
 
-		memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
-		tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+	if (tx_buf->type == LIBETH_SQE_SKB)
+		libeth_tx_complete(tx_buf, &cp);
 
-		num_descs_cleaned++;
+	idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf);
+
+	while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) {
+		libeth_tx_complete(tx_buf, &cp);
 		idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf);
 	}
 
-	/* If we didn't clean anything on the ring for this completion, there's
-	 * nothing more to do.
-	 */
-	if (unlikely(!num_descs_cleaned))
-		return false;
-
-	/* Otherwise, if we did clean a packet on the ring directly, it's safe
-	 * to assume that the descriptors starting from the original
-	 * next_to_clean up until the previously cleaned packet can be reused.
-	 * Therefore, we will go back in the ring and stash any buffers still
-	 * in the ring into the hash table to be cleaned later.
+	/*
+	 * It's possible the packet we just cleaned was an out of order
+	 * completion, which means we can stash the buffers starting from
+	 * the original next_to_clean and reuse the descriptors. We need
+	 * to compare the descriptor ring next_to_clean packet's "first" buffer
+	 * to the "first" buffer of the packet we just cleaned to determine if
+	 * this is the case. Howevever, next_to_clean can point to either a
+	 * reserved buffer that corresponds to a context descriptor used for the
+	 * next_to_clean packet (TSO packet) or the "first" buffer (single
+	 * packet). The orig_idx from the packet we just cleaned will always
+	 * point to the "first" buffer. If next_to_clean points to a reserved
+	 * buffer, let's bump ntc once and start the comparison from there.
 	 */
+	ntc = txq->next_to_clean;
 	tx_buf = &txq->tx_buf[ntc];
-	while (tx_buf != &txq->tx_buf[orig_idx]) {
-		idpf_stash_flow_sch_buffers(txq, tx_buf);
+
+	if (tx_buf->type == LIBETH_SQE_CTX)
 		idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf);
-	}
 
-	/* Finally, update next_to_clean to reflect the work that was just done
-	 * on the ring, if any. If the packet was only cleaned from the hash
-	 * table, the ring will not be impacted, therefore we should not touch
-	 * next_to_clean. The updated idx is used here
+	/*
+	 * If ntc still points to a different "first" buffer, clean the
+	 * descriptor ring and stash all of the buffers for later cleaning. If
+	 * we cannot stash all of the buffers, next_to_clean will point to the
+	 * "first" buffer of the packet that could not be stashed and cleaning
+	 * will start there next time.
+	 */
+	if (unlikely(tx_buf != &txq->tx_buf[orig_idx] &&
+		     !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned,
+					   true)))
+		return true;
+
+	/*
+	 * Otherwise, update next_to_clean to reflect the cleaning that was
+	 * done above.
 	 */
 	txq->next_to_clean = idx;
 
@@ -1965,7 +1939,7 @@ static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag,
  */
 static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq,
 					 struct idpf_splitq_tx_compl_desc *desc,
-					 struct idpf_cleaned_stats *cleaned,
+					 struct libeth_sq_napi_stats *cleaned,
 					 int budget)
 {
 	u16 compl_tag;
@@ -1973,7 +1947,8 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq,
 	if (!idpf_queue_has(FLOW_SCH_EN, txq)) {
 		u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head);
 
-		return idpf_tx_splitq_clean(txq, head, budget, cleaned, false);
+		idpf_tx_splitq_clean(txq, head, budget, cleaned, false);
+		return;
 	}
 
 	compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag);
@@ -2008,7 +1983,7 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
 	ntc -= complq->desc_count;
 
 	do {
-		struct idpf_cleaned_stats cleaned_stats = { };
+		struct libeth_sq_napi_stats cleaned_stats = { };
 		struct idpf_tx_queue *tx_q;
 		int rel_tx_qid;
 		u16 hw_head;
@@ -2158,29 +2133,6 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
 }
 
 /**
- * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions
- * @tx_q: the queue to be checked
- * @size: number of descriptors we want to assure is available
- *
- * Returns 0 if stop is not needed
- */
-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size)
-{
-	struct netdev_queue *nq;
-
-	if (likely(IDPF_DESC_UNUSED(tx_q) >= size))
-		return 0;
-
-	u64_stats_update_begin(&tx_q->stats_sync);
-	u64_stats_inc(&tx_q->q_stats.q_busy);
-	u64_stats_update_end(&tx_q->stats_sync);
-
-	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
-
-	return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size);
-}
-
-/**
  * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
  * @tx_q: the queue to be checked
  * @descs_needed: number of descriptors required for this packet
@@ -2191,7 +2143,7 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
 				     unsigned int descs_needed)
 {
 	if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
-		goto splitq_stop;
+		goto out;
 
 	/* If there are too many outstanding completions expected on the
 	 * completion queue, stop the TX queue to give the device some time to
@@ -2210,10 +2162,12 @@ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
 	return 0;
 
 splitq_stop:
+	netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+
+out:
 	u64_stats_update_begin(&tx_q->stats_sync);
 	u64_stats_inc(&tx_q->q_stats.q_busy);
 	u64_stats_update_end(&tx_q->stats_sync);
-	netif_stop_subqueue(tx_q->netdev, tx_q->idx);
 
 	return -EBUSY;
 }
@@ -2236,7 +2190,11 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
 	tx_q->next_to_use = val;
 
-	idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED);
+	if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
+		u64_stats_update_begin(&tx_q->stats_sync);
+		u64_stats_inc(&tx_q->q_stats.q_busy);
+		u64_stats_update_end(&tx_q->stats_sync);
+	}
 
 	/* Force memory writes to complete before letting h/w
 	 * know there are new descriptors to fetch.  (Only
@@ -2307,6 +2265,12 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
 void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
 			   struct idpf_tx_buf *first, u16 idx)
 {
+	struct libeth_sq_napi_stats ss = { };
+	struct libeth_cq_pp cp = {
+		.dev	= txq->dev,
+		.ss	= &ss,
+	};
+
 	u64_stats_update_begin(&txq->stats_sync);
 	u64_stats_inc(&txq->q_stats.dma_map_errs);
 	u64_stats_update_end(&txq->stats_sync);
@@ -2316,7 +2280,7 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
 		struct idpf_tx_buf *tx_buf;
 
 		tx_buf = &txq->tx_buf[idx];
-		idpf_tx_buf_rel(txq, tx_buf);
+		libeth_tx_complete(tx_buf, &cp);
 		if (tx_buf == first)
 			break;
 		if (idx == 0)
@@ -2395,6 +2359,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
 
 	tx_buf = first;
+	first->nr_frags = 0;
 
 	params->compl_tag =
 		(tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i;
@@ -2405,7 +2370,9 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 		if (dma_mapping_error(tx_q->dev, dma))
 			return idpf_tx_dma_map_error(tx_q, skb, first, i);
 
-		tx_buf->compl_tag = params->compl_tag;
+		first->nr_frags++;
+		idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag;
+		tx_buf->type = LIBETH_SQE_FRAG;
 
 		/* record length, and DMA address */
 		dma_unmap_len_set(tx_buf, len, size);
@@ -2459,14 +2426,15 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 			idpf_tx_splitq_build_desc(tx_desc, params, td_cmd,
 						  max_data);
 
-			tx_desc++;
-			i++;
-
-			if (i == tx_q->desc_count) {
+			if (unlikely(++i == tx_q->desc_count)) {
+				tx_buf = tx_q->tx_buf;
 				tx_desc = &tx_q->flex_tx[0];
 				i = 0;
 				tx_q->compl_tag_cur_gen =
 					IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+			} else {
+				tx_buf++;
+				tx_desc++;
 			}
 
 			/* Since this packet has a buffer that is going to span
@@ -2479,8 +2447,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 			 * simply pass over these holes and finish cleaning the
 			 * rest of the packet.
 			 */
-			memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
-			tx_q->tx_buf[i].compl_tag = params->compl_tag;
+			tx_buf->type = LIBETH_SQE_EMPTY;
 
 			/* Adjust the DMA offset and the remaining size of the
 			 * fragment.  On the first iteration of this loop,
@@ -2504,13 +2471,15 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 			break;
 
 		idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
-		tx_desc++;
-		i++;
 
-		if (i == tx_q->desc_count) {
+		if (unlikely(++i == tx_q->desc_count)) {
+			tx_buf = tx_q->tx_buf;
 			tx_desc = &tx_q->flex_tx[0];
 			i = 0;
 			tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+		} else {
+			tx_buf++;
+			tx_desc++;
 		}
 
 		size = skb_frag_size(frag);
@@ -2518,26 +2487,24 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q,
 
 		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
 				       DMA_TO_DEVICE);
-
-		tx_buf = &tx_q->tx_buf[i];
 	}
 
 	/* record SW timestamp if HW timestamp is not available */
 	skb_tx_timestamp(skb);
 
+	first->type = LIBETH_SQE_SKB;
+
 	/* write last descriptor with RS and EOP bits */
+	first->rs_idx = i;
 	td_cmd |= params->eop_cmd;
 	idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
 	i = idpf_tx_splitq_bump_ntu(tx_q, i);
 
-	/* set next_to_watch value indicating a packet is present */
-	first->next_to_watch = tx_desc;
-
 	tx_q->txq_grp->num_completions_pending++;
 
 	/* record bytecount for BQL */
 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
-	netdev_tx_sent_queue(nq, first->bytecount);
+	netdev_tx_sent_queue(nq, first->bytes);
 
 	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
 }
@@ -2737,8 +2704,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq)
 	struct idpf_flex_tx_ctx_desc *desc;
 	int i = txq->next_to_use;
 
-	memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
-	txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+	txq->tx_buf[i].type = LIBETH_SQE_CTX;
 
 	/* grab the next descriptor */
 	desc = &txq->flex_ctx[i];
@@ -2822,12 +2788,12 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
 	first->skb = skb;
 
 	if (tso) {
-		first->gso_segs = tx_params.offload.tso_segs;
-		first->bytecount = skb->len +
-			((first->gso_segs - 1) * tx_params.offload.tso_hdr_len);
+		first->packets = tx_params.offload.tso_segs;
+		first->bytes = skb->len +
+			((first->packets - 1) * tx_params.offload.tso_hdr_len);
 	} else {
-		first->gso_segs = 1;
-		first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+		first->packets = 1;
+		first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
 	if (idpf_queue_has(FLOW_SCH_EN, tx_q)) {
@@ -3576,9 +3542,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport)
  */
 void idpf_vport_intr_rel(struct idpf_vport *vport)
 {
-	int i, j, v_idx;
-
-	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+	for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
 		struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
 
 		kfree(q_vector->complq);
@@ -3593,26 +3557,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport)
 		free_cpumask_var(q_vector->affinity_mask);
 	}
 
-	/* Clean up the mapping of queues to vectors */
-	for (i = 0; i < vport->num_rxq_grp; i++) {
-		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
-
-		if (idpf_is_queue_model_split(vport->rxq_model))
-			for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++)
-				rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL;
-		else
-			for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
-				rx_qgrp->singleq.rxqs[j]->q_vector = NULL;
-	}
-
-	if (idpf_is_queue_model_split(vport->txq_model))
-		for (i = 0; i < vport->num_txq_grp; i++)
-			vport->txq_grps[i].complq->q_vector = NULL;
-	else
-		for (i = 0; i < vport->num_txq_grp; i++)
-			for (j = 0; j < vport->txq_grps[i].num_txq; j++)
-				vport->txq_grps[i].txqs[j]->q_vector = NULL;
-
 	kfree(vport->q_vectors);
 	vport->q_vectors = NULL;
 }
@@ -3771,6 +3715,7 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
 	/* net_dim() updates ITR out-of-band using a work item */
 	idpf_net_dim(q_vector);
 
+	q_vector->wb_on_itr = false;
 	intval = idpf_vport_intr_buildreg_itr(q_vector,
 					      IDPF_NO_ITR_UPDATE_IDX, 0);
 
@@ -3780,13 +3725,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
 /**
  * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
  * @vport: main vport structure
- * @basename: name for the vector
  */
-static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
 {
 	struct idpf_adapter *adapter = vport->adapter;
+	const char *drv_name, *if_name, *vec_name;
 	int vector, err, irq_num, vidx;
-	const char *vec_name;
+
+	drv_name = dev_driver_string(&adapter->pdev->dev);
+	if_name = netdev_name(vport->netdev);
 
 	for (vector = 0; vector < vport->num_q_vectors; vector++) {
 		struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
@@ -3804,8 +3751,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
 		else
 			continue;
 
-		name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name,
-				 vidx);
+		name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
+				 vec_name, vidx);
 
 		err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
 				  name, q_vector);
@@ -4071,8 +4018,10 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
 	clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done);
 
 	/* If work not completed, return budget and polling will return */
-	if (!clean_complete)
+	if (!clean_complete) {
+		idpf_vport_intr_set_wb_on_itr(q_vector);
 		return budget;
+	}
 
 	work_done = min_t(int, work_done, budget - 1);
 
@@ -4081,6 +4030,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	if (likely(napi_complete_done(napi, work_done)))
 		idpf_vport_intr_update_itr_ena_irq(q_vector);
+	else
+		idpf_vport_intr_set_wb_on_itr(q_vector);
 
 	/* Switch to poll mode in the tear-down path after sending disable
 	 * queues virtchnl message, as the interrupts will be disabled after
@@ -4326,7 +4277,6 @@ error:
  */
 int idpf_vport_intr_init(struct idpf_vport *vport)
 {
-	char *int_name;
 	int err;
 
 	err = idpf_vport_intr_init_vec_idx(vport);
@@ -4340,11 +4290,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
 	if (err)
 		goto unroll_vectors_alloc;
 
-	int_name = kasprintf(GFP_KERNEL, "%s-%s",
-			     dev_driver_string(&vport->adapter->pdev->dev),
-			     vport->netdev->name);
-
-	err = idpf_vport_intr_req_irq(vport, int_name);
+	err = idpf_vport_intr_req_irq(vport);
 	if (err)
 		goto unroll_vectors_alloc;
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 6215dbee5546..f0537826f840 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -127,11 +127,10 @@ do {								\
  */
 #define IDPF_TX_COMPLQ_PENDING(txq)	\
 	(((txq)->num_completions_pending >= (txq)->complq->num_completions ? \
-	0 : U64_MAX) + \
+	0 : U32_MAX) + \
 	(txq)->num_completions_pending - (txq)->complq->num_completions)
 
 #define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH	16
-#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG	-1
 /* Adjust the generation for the completion tag and wrap if necessary */
 #define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \
 	((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \
@@ -149,47 +148,7 @@ union idpf_tx_flex_desc {
 	struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */
 };
 
-/**
- * struct idpf_tx_buf
- * @next_to_watch: Next descriptor to clean
- * @skb: Pointer to the skb
- * @dma: DMA address
- * @len: DMA length
- * @bytecount: Number of bytes
- * @gso_segs: Number of GSO segments
- * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare
- *	       with completion tag returned in buffer completion event.
- *	       Because the completion tag is expected to be the same in all
- *	       data descriptors for a given packet, and a single packet can
- *	       span multiple buffers, we need this field to track all
- *	       buffers associated with this completion tag independently of
- *	       the buf_id. The tag consists of a N bit buf_id and M upper
- *	       order "generation bits". See compl_tag_bufid_m and
- *	       compl_tag_gen_s in struct idpf_queue. We'll use a value of -1
- *	       to indicate the tag is not valid.
- * @ctx_entry: Singleq only. Used to indicate the corresponding entry
- *	       in the descriptor ring was used for a context descriptor and
- *	       this buffer entry should be skipped.
- */
-struct idpf_tx_buf {
-	void *next_to_watch;
-	struct sk_buff *skb;
-	DEFINE_DMA_UNMAP_ADDR(dma);
-	DEFINE_DMA_UNMAP_LEN(len);
-	unsigned int bytecount;
-	unsigned short gso_segs;
-
-	union {
-		int compl_tag;
-
-		bool ctx_entry;
-	};
-};
-
-struct idpf_tx_stash {
-	struct hlist_node hlist;
-	struct idpf_tx_buf buf;
-};
+#define idpf_tx_buf libeth_sqe
 
 /**
  * struct idpf_buf_lifo - LIFO for managing OOO completions
@@ -390,9 +349,11 @@ struct idpf_vec_regs {
  * struct idpf_intr_reg
  * @dyn_ctl: Dynamic control interrupt register
  * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable
+ * @dyn_ctl_intena_msk_m: Mask for dyn_ctl interrupt enable mask
  * @dyn_ctl_itridx_s: Register bit offset for ITR index
  * @dyn_ctl_itridx_m: Mask for ITR index
  * @dyn_ctl_intrvl_s: Register bit offset for ITR interval
+ * @dyn_ctl_wb_on_itr_m: Mask for WB on ITR feature
  * @rx_itr: RX ITR register
  * @tx_itr: TX ITR register
  * @icr_ena: Interrupt cause register offset
@@ -401,9 +362,11 @@ struct idpf_vec_regs {
 struct idpf_intr_reg {
 	void __iomem *dyn_ctl;
 	u32 dyn_ctl_intena_m;
+	u32 dyn_ctl_intena_msk_m;
 	u32 dyn_ctl_itridx_s;
 	u32 dyn_ctl_itridx_m;
 	u32 dyn_ctl_intrvl_s;
+	u32 dyn_ctl_wb_on_itr_m;
 	void __iomem *rx_itr;
 	void __iomem *tx_itr;
 	void __iomem *icr_ena;
@@ -424,6 +387,7 @@ struct idpf_intr_reg {
  * @intr_reg: See struct idpf_intr_reg
  * @napi: napi handler
  * @total_events: Number of interrupts processed
+ * @wb_on_itr: whether WB on ITR is enabled
  * @tx_dim: Data for TX net_dim algorithm
  * @tx_itr_value: TX interrupt throttling rate
  * @tx_intr_mode: Dynamic ITR or not
@@ -454,6 +418,7 @@ struct idpf_q_vector {
 	__cacheline_group_begin_aligned(read_write);
 	struct napi_struct napi;
 	u16 total_events;
+	bool wb_on_itr;
 
 	struct dim tx_dim;
 	u16 tx_itr_value;
@@ -472,7 +437,7 @@ struct idpf_q_vector {
 	cpumask_var_t affinity_mask;
 	__cacheline_group_end_aligned(cold);
 };
-libeth_cacheline_set_assert(struct idpf_q_vector, 104,
+libeth_cacheline_set_assert(struct idpf_q_vector, 112,
 			    424 + 2 * sizeof(struct dim),
 			    8 + sizeof(cpumask_var_t));
 
@@ -496,11 +461,6 @@ struct idpf_tx_queue_stats {
 	u64_stats_t dma_map_errs;
 };
 
-struct idpf_cleaned_stats {
-	u32 packets;
-	u32 bytes;
-};
-
 #define IDPF_ITR_DYNAMIC	1
 #define IDPF_ITR_MAX		0x1FE0
 #define IDPF_ITR_20K		0x0032
@@ -688,7 +648,7 @@ struct idpf_tx_queue {
 
 		void *desc_ring;
 	};
-	struct idpf_tx_buf *tx_buf;
+	struct libeth_sqe *tx_buf;
 	struct idpf_txq_group *txq_grp;
 	struct device *dev;
 	void __iomem *tail;
@@ -831,7 +791,7 @@ struct idpf_compl_queue {
 	u32 next_to_use;
 	u32 next_to_clean;
 
-	u32 num_completions;
+	aligned_u64 num_completions;
 	__cacheline_group_end_aligned(read_write);
 
 	__cacheline_group_begin_aligned(cold);
@@ -963,7 +923,7 @@ struct idpf_txq_group {
 
 	struct idpf_compl_queue *complq;
 
-	u32 num_completions_pending;
+	aligned_u64 num_completions_pending;
 };
 
 static inline int idpf_q_vector_to_mem(const struct idpf_q_vector *q_vector)
@@ -1033,6 +993,25 @@ static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
 		idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
 }
 
+/**
+ * idpf_vport_intr_set_wb_on_itr - enable descriptor writeback on disabled interrupts
+ * @q_vector: pointer to queue vector struct
+ */
+static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector)
+{
+	struct idpf_intr_reg *reg;
+
+	if (q_vector->wb_on_itr)
+		return;
+
+	q_vector->wb_on_itr = true;
+	reg = &q_vector->intr_reg;
+
+	writel(reg->dyn_ctl_wb_on_itr_m | reg->dyn_ctl_intena_msk_m |
+	       (IDPF_NO_ITR_UPDATE_IDX << reg->dyn_ctl_itridx_s),
+	       reg->dyn_ctl);
+}
+
 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget);
 void idpf_vport_init_num_qs(struct idpf_vport *vport,
 			    struct virtchnl2_create_vport *vport_msg);
@@ -1064,7 +1043,6 @@ void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
 			   struct idpf_tx_buf *first, u16 ring_idx);
 unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
 					 struct sk_buff *skb);
-int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size);
 void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
 				  struct idpf_tx_queue *tx_q);
@@ -1073,4 +1051,12 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
 				      u16 cleaned_count);
 int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
 
+static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
+					     u32 needed)
+{
+	return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+					  IDPF_DESC_UNUSED(tx_q),
+					  needed, needed);
+}
+
 #endif /* !_IDPF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
index 629cb5cb7c9f..99b8dbaf4225 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
@@ -97,7 +97,9 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
 		intr->dyn_ctl = idpf_get_reg_addr(adapter,
 						  reg_vals[vec_id].dyn_ctl_reg);
 		intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M;
+		intr->dyn_ctl_intena_msk_m = VF_INT_DYN_CTLN_INTENA_MSK_M;
 		intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S;
+		intr->dyn_ctl_wb_on_itr_m = VF_INT_DYN_CTLN_WB_ON_ITR_M;
 
 		spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
 					       IDPF_VF_ITR_IDX_SPACING);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 06b9970dffad..ca6ccbc13954 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2387,15 +2387,11 @@ static int igb_get_ts_info(struct net_device *dev,
 
 	if (adapter->ptp_clock)
 		info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82575:
 		info->so_timestamping =
-			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE;
+			SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	case e1000_82576:
 	case e1000_82580:
@@ -2405,8 +2401,6 @@ static int igb_get_ts_info(struct net_device *dev,
 	case e1000_i211:
 		info->so_timestamping =
 			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE |
 			SOF_TIMESTAMPING_TX_HARDWARE |
 			SOF_TIMESTAMPING_RX_HARDWARE |
 			SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 11be39f435f3..1ef4cb871452 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -33,6 +33,7 @@
 #include <linux/bpf_trace.h>
 #include <linux/pm_runtime.h>
 #include <linux/etherdevice.h>
+#include <linux/lockdep.h>
 #ifdef CONFIG_IGB_DCA
 #include <linux/dca.h>
 #endif
@@ -2914,8 +2915,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
+/* This function assumes __netif_tx_lock is held by the caller. */
 static void igb_xdp_ring_update_tail(struct igb_ring *ring)
 {
+	lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
+
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
 	 */
@@ -3000,11 +3004,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
 		nxmit++;
 	}
 
-	__netif_tx_unlock(nq);
-
 	if (unlikely(flags & XDP_XMIT_FLUSH))
 		igb_xdp_ring_update_tail(tx_ring);
 
+	__netif_tx_unlock(nq);
+
 	return nxmit;
 }
 
@@ -4808,6 +4812,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
 
 #if (PAGE_SIZE < 8192)
 	if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB ||
+	    IGB_2K_TOO_SMALL_WITH_PADDING ||
 	    rd32(E1000_RCTL) & E1000_RCTL_SBP)
 		set_ring_uses_large_buffer(rx_ring);
 #endif
@@ -6959,10 +6964,20 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
 
 static void igb_tsync_interrupt(struct igb_adapter *adapter)
 {
+	const u32 mask = (TSINTR_SYS_WRAP | E1000_TSICR_TXTS |
+			  TSINTR_TT0 | TSINTR_TT1 |
+			  TSINTR_AUTT0 | TSINTR_AUTT1);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 tsicr = rd32(E1000_TSICR);
 	struct ptp_clock_event event;
 
+	if (hw->mac.type == e1000_82580) {
+		/* 82580 has a hardware bug that requires an explicit
+		 * write to clear the TimeSync interrupt cause.
+		 */
+		wr32(E1000_TSICR, tsicr & mask);
+	}
+
 	if (tsicr & TSINTR_SYS_WRAP) {
 		event.type = PTP_CLOCK_PPS;
 		if (adapter->ptp_caps.pps)
@@ -8853,12 +8868,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
 
 static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 {
+	unsigned int total_bytes = 0, total_packets = 0;
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *rx_ring = q_vector->rx.ring;
-	struct sk_buff *skb = rx_ring->skb;
-	unsigned int total_bytes = 0, total_packets = 0;
 	u16 cleaned_count = igb_desc_unused(rx_ring);
+	struct sk_buff *skb = rx_ring->skb;
+	int cpu = smp_processor_id();
 	unsigned int xdp_xmit = 0;
+	struct netdev_queue *nq;
 	struct xdp_buff xdp;
 	u32 frame_sz = 0;
 	int rx_buf_pgcnt;
@@ -8986,7 +9003,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 	if (xdp_xmit & IGB_XDP_TX) {
 		struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
 
+		nq = txring_txq(tx_ring);
+		__netif_tx_lock(nq, cpu);
 		igb_xdp_ring_update_tail(tx_ring);
+		__netif_tx_unlock(nq);
 	}
 
 	u64_stats_update_begin(&rx_ring->rx_syncp);
diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
index 7b83678ba83a..6ad35a00a287 100644
--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
+++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
@@ -282,7 +282,6 @@ enum igbvf_state_t {
 
 extern char igbvf_driver_name[];
 
-void igbvf_check_options(struct igbvf_adapter *);
 void igbvf_set_ethtool_ops(struct net_device *);
 
 int igbvf_up(struct igbvf_adapter *);
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h
index e5b31818d565..7637d21445bf 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.h
+++ b/drivers/net/ethernet/intel/igbvf/mbx.h
@@ -49,7 +49,6 @@
 
 #define E1000_PF_CONTROL_MSG	0x0100 /* PF control message */
 
-void e1000_init_mbx_ops_generic(struct e1000_hw *hw);
 s32 e1000_init_mbx_params_vf(struct e1000_hw *);
 
 #endif /* _E1000_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index c38b4d0f00ce..eac0f966e0e4 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -259,6 +259,10 @@ struct igc_adapter {
 	 */
 	spinlock_t qbv_tx_lock;
 
+	bool strict_priority_enable;
+	u8 num_tc;
+	u16 queue_per_tc[IGC_MAX_TX_QUEUES];
+
 	/* OS defined structs */
 	struct pci_dev *pdev;
 	/* lock for statistics */
@@ -382,9 +386,11 @@ extern char igc_driver_name[];
 #define IGC_FLAG_RX_LEGACY		BIT(16)
 #define IGC_FLAG_TSN_QBV_ENABLED	BIT(17)
 #define IGC_FLAG_TSN_QAV_ENABLED	BIT(18)
+#define IGC_FLAG_TSN_LEGACY_ENABLED	BIT(19)
 
-#define IGC_FLAG_TSN_ANY_ENABLED \
-	(IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED)
+#define IGC_FLAG_TSN_ANY_ENABLED				\
+	(IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED |	\
+	 IGC_FLAG_TSN_LEGACY_ENABLED)
 
 #define IGC_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
 #define IGC_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
@@ -681,6 +687,7 @@ enum igc_ring_flags_t {
 	IGC_RING_FLAG_TX_DETECT_HANG,
 	IGC_RING_FLAG_AF_XDP_ZC,
 	IGC_RING_FLAG_TX_HWTSTAMP,
+	IGC_RING_FLAG_RX_ALLOC_FAILED,
 };
 
 #define ring_uses_large_buffer(ring) \
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 5f92b3c7c3d4..8e449904aa7d 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -4,6 +4,8 @@
 #ifndef _IGC_DEFINES_H_
 #define _IGC_DEFINES_H_
 
+#include <linux/bitfield.h>
+
 /* Number of Transmit and Receive Descriptors must be a multiple of 8 */
 #define REQ_TX_DESCRIPTOR_MULTIPLE	8
 #define REQ_RX_DESCRIPTOR_MULTIPLE	8
@@ -176,7 +178,6 @@
 
 /* PHY GPY 211 registers */
 #define STANDARD_AN_REG_MASK	0x0007 /* MMD */
-#define ANEG_MULTIGBT_AN_CTRL	0x0020 /* MULTI GBT AN Control Register */
 #define MMD_DEVADDR_SHIFT	16     /* Shift MMD to higher bits */
 #define CR_2500T_FD_CAPS	0x0080 /* Advertise 2500T FD capability */
 
@@ -404,6 +405,12 @@
 #define IGC_DTXMXPKTSZ_TSN	0x19 /* 1600 bytes of max TX DMA packet size */
 #define IGC_DTXMXPKTSZ_DEFAULT	0x98 /* 9728-byte Jumbo frames */
 
+/* Retry Buffer Control */
+#define IGC_RETX_CTL			0x041C
+#define IGC_RETX_CTL_WATERMARK_MASK	0xF
+#define IGC_RETX_CTL_QBVFULLTH_SHIFT	8 /* QBV Retry Buffer Full Threshold */
+#define IGC_RETX_CTL_QBVFULLEN	0x1000 /* Enable QBV Retry Buffer Full Threshold */
+
 /* Transmit Scheduling Latency */
 /* Latency between transmission scheduling (LaunchTime) and the time
  * the packet is transmitted to the network in nanosecond.
@@ -547,6 +554,15 @@
 
 #define IGC_MAX_SR_QUEUES		2
 
+#define IGC_TXARB_TXQ_PRIO_0_MASK	GENMASK(1, 0)
+#define IGC_TXARB_TXQ_PRIO_1_MASK	GENMASK(3, 2)
+#define IGC_TXARB_TXQ_PRIO_2_MASK	GENMASK(5, 4)
+#define IGC_TXARB_TXQ_PRIO_3_MASK	GENMASK(7, 6)
+#define IGC_TXARB_TXQ_PRIO_0(x)		FIELD_PREP(IGC_TXARB_TXQ_PRIO_0_MASK, (x))
+#define IGC_TXARB_TXQ_PRIO_1(x)		FIELD_PREP(IGC_TXARB_TXQ_PRIO_1_MASK, (x))
+#define IGC_TXARB_TXQ_PRIO_2(x)		FIELD_PREP(IGC_TXARB_TXQ_PRIO_2_MASK, (x))
+#define IGC_TXARB_TXQ_PRIO_3(x)		FIELD_PREP(IGC_TXARB_TXQ_PRIO_3_MASK, (x))
+
 /* Receive Checksum Control */
 #define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
 #define IGC_RXCSUM_PCSD		0x00002000   /* packet checksum disabled */
@@ -635,6 +651,16 @@
 #define IGC_MDIC_READY		0x10000000
 #define IGC_MDIC_ERROR		0x40000000
 
+/* EEE Link Ability */
+#define IGC_EEE_2500BT_MASK	BIT(0)
+#define IGC_EEE_1000BT_MASK	BIT(2)
+#define IGC_EEE_100BT_MASK	BIT(1)
+
+/* EEE Link-Partner Ability */
+#define IGC_LP_EEE_2500BT_MASK	BIT(0)
+#define IGC_LP_EEE_1000BT_MASK	BIT(2)
+#define IGC_LP_EEE_100BT_MASK	BIT(1)
+
 #define IGC_N0_QUEUE		-1
 
 #define IGC_MAX_MAC_HDR_LEN	127
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 3d3ef4e1547c..5b0c6f433767 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1540,6 +1540,10 @@ static int igc_ethtool_set_channels(struct net_device *netdev,
 	if (ch->other_count != NON_Q_VECTORS)
 		return -EINVAL;
 
+	/* Do not allow channel reconfiguration when mqprio is enabled */
+	if (adapter->strict_priority_enable)
+		return -EINVAL;
+
 	/* Verify the number of channels doesn't exceed hw limits */
 	max_combined = igc_get_max_rss_queues(adapter);
 	if (count > max_combined)
@@ -1565,15 +1569,11 @@ static int igc_ethtool_get_ts_info(struct net_device *dev,
 
 	if (adapter->ptp_clock)
 		info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	switch (adapter->hw.mac.type) {
 	case igc_i225:
 		info->so_timestamping =
 			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE |
 			SOF_TIMESTAMPING_TX_HARDWARE |
 			SOF_TIMESTAMPING_RX_HARDWARE |
 			SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -1627,8 +1627,11 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
-	u32 eeer;
+	struct igc_phy_info *phy = &hw->phy;
+	u16 eee_advert, eee_lp_advert;
+	u32 eeer, ret_val;
 
+	/* EEE supported */
 	linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
 			 edata->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
@@ -1636,6 +1639,74 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
 	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
 			 edata->supported);
 
+	/* EEE Advertisement 1 - reg 7.60 */
+	ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+				    MMD_DEVADDR_SHIFT) |
+				    IGC_ANEG_EEE_AB1,
+				    &eee_advert);
+	if (ret_val) {
+		netdev_err(adapter->netdev,
+			   "Failed to read IEEE 7.60 register\n");
+		return -EINVAL;
+	}
+
+	if (eee_advert & IGC_EEE_1000BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				 edata->advertised);
+
+	if (eee_advert & IGC_EEE_100BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 edata->advertised);
+
+	/* EEE Advertisement 2 - reg 7.62 */
+	ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+				    MMD_DEVADDR_SHIFT) |
+				    IGC_ANEG_EEE_AB2,
+				    &eee_advert);
+	if (ret_val) {
+		netdev_err(adapter->netdev,
+			   "Failed to read IEEE 7.62 register\n");
+		return -EINVAL;
+	}
+
+	if (eee_advert & IGC_EEE_2500BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+				 edata->advertised);
+
+	/* EEE Link-Partner Ability 1 - reg 7.61 */
+	ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+				    MMD_DEVADDR_SHIFT) |
+				    IGC_ANEG_EEE_LP_AB1,
+				    &eee_lp_advert);
+	if (ret_val) {
+		netdev_err(adapter->netdev,
+			   "Failed to read IEEE 7.61 register\n");
+		return -EINVAL;
+	}
+
+	if (eee_lp_advert & IGC_LP_EEE_1000BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				 edata->lp_advertised);
+
+	if (eee_lp_advert & IGC_LP_EEE_100BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 edata->lp_advertised);
+
+	/* EEE Link-Partner Ability 2 - reg 7.63 */
+	ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+				    MMD_DEVADDR_SHIFT) |
+				    IGC_ANEG_EEE_LP_AB2,
+				    &eee_lp_advert);
+	if (ret_val) {
+		netdev_err(adapter->netdev,
+			   "Failed to read IEEE 7.63 register\n");
+		return -EINVAL;
+	}
+
+	if (eee_lp_advert & IGC_LP_EEE_2500BT_MASK)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+				 edata->lp_advertised);
+
 	eeer = rd32(IGC_EEER);
 
 	/* EEE status on negotiated link */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index cb5c7b09e8a0..6e70bca15db1 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2191,6 +2191,7 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
 	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_failed++;
+		set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
 		return false;
 	}
 
@@ -2207,6 +2208,7 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
 		__free_page(page);
 
 		rx_ring->rx_stats.alloc_failed++;
+		set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
 		return false;
 	}
 
@@ -2658,6 +2660,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		if (!skb) {
 			rx_ring->rx_stats.alloc_failed++;
 			rx_buffer->pagecnt_bias++;
+			set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
 			break;
 		}
 
@@ -2738,6 +2741,7 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
 	skb = igc_construct_skb_zc(ring, xdp);
 	if (!skb) {
 		ring->rx_stats.alloc_failed++;
+		set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags);
 		return;
 	}
 
@@ -5807,11 +5811,29 @@ no_wait:
 	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
 		u32 eics = 0;
 
-		for (i = 0; i < adapter->num_q_vectors; i++)
-			eics |= adapter->q_vector[i]->eims_value;
-		wr32(IGC_EICS, eics);
+		for (i = 0; i < adapter->num_q_vectors; i++) {
+			struct igc_q_vector *q_vector = adapter->q_vector[i];
+			struct igc_ring *rx_ring;
+
+			if (!q_vector->rx.ring)
+				continue;
+
+			rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index];
+
+			if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) {
+				eics |= q_vector->eims_value;
+				clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
+			}
+		}
+		if (eics)
+			wr32(IGC_EICS, eics);
 	} else {
-		wr32(IGC_ICS, IGC_ICS_RXDMT0);
+		struct igc_ring *rx_ring = adapter->rx_ring[0];
+
+		if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) {
+			clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
+			wr32(IGC_ICS, IGC_ICS_RXDMT0);
+		}
 	}
 
 	igc_ptp_tx_hang(adapter);
@@ -6306,21 +6328,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	size_t n;
 	int i;
 
-	switch (qopt->cmd) {
-	case TAPRIO_CMD_REPLACE:
-		break;
-	case TAPRIO_CMD_DESTROY:
-		return igc_tsn_clear_schedule(adapter);
-	case TAPRIO_CMD_STATS:
-		igc_taprio_stats(adapter->netdev, &qopt->stats);
-		return 0;
-	case TAPRIO_CMD_QUEUE_STATS:
-		igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-
 	if (qopt->base_time < 0)
 		return -ERANGE;
 
@@ -6330,12 +6337,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	if (!validate_schedule(adapter, qopt))
 		return -EINVAL;
 
+	igc_ptp_read(adapter, &now);
+
+	if (igc_tsn_is_taprio_activated_by_user(adapter) &&
+	    is_base_time_past(qopt->base_time, &now))
+		adapter->qbv_config_change_errors++;
+
 	adapter->cycle_time = qopt->cycle_time;
 	adapter->base_time = qopt->base_time;
 	adapter->taprio_offload_enable = true;
 
-	igc_ptp_read(adapter, &now);
-
 	for (n = 0; n < qopt->num_entries; n++) {
 		struct tc_taprio_sched_entry *e = &qopt->entries[n];
 
@@ -6429,7 +6440,23 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
 	if (hw->mac.type != igc_i225)
 		return -EOPNOTSUPP;
 
-	err = igc_save_qbv_schedule(adapter, qopt);
+	switch (qopt->cmd) {
+	case TAPRIO_CMD_REPLACE:
+		err = igc_save_qbv_schedule(adapter, qopt);
+		break;
+	case TAPRIO_CMD_DESTROY:
+		err = igc_tsn_clear_schedule(adapter);
+		break;
+	case TAPRIO_CMD_STATS:
+		igc_taprio_stats(adapter->netdev, &qopt->stats);
+		return 0;
+	case TAPRIO_CMD_QUEUE_STATS:
+		igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
 	if (err)
 		return err;
 
@@ -6510,6 +6537,13 @@ static int igc_tc_query_caps(struct igc_adapter *adapter,
 	struct igc_hw *hw = &adapter->hw;
 
 	switch (base->type) {
+	case TC_SETUP_QDISC_MQPRIO: {
+		struct tc_mqprio_caps *caps = base->caps;
+
+		caps->validate_queue_counts = true;
+
+		return 0;
+	}
 	case TC_SETUP_QDISC_TAPRIO: {
 		struct tc_taprio_caps *caps = base->caps;
 
@@ -6527,6 +6561,65 @@ static int igc_tc_query_caps(struct igc_adapter *adapter,
 	}
 }
 
+static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc,
+				   u16 *offset)
+{
+	int i;
+
+	adapter->strict_priority_enable = true;
+	adapter->num_tc = num_tc;
+
+	for (i = 0; i < num_tc; i++)
+		adapter->queue_per_tc[i] = offset[i];
+}
+
+static int igc_tsn_enable_mqprio(struct igc_adapter *adapter,
+				 struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	if (!mqprio->qopt.num_tc) {
+		adapter->strict_priority_enable = false;
+		goto apply;
+	}
+
+	/* There are as many TCs as Tx queues. */
+	if (mqprio->qopt.num_tc != adapter->num_tx_queues) {
+		NL_SET_ERR_MSG_FMT_MOD(mqprio->extack,
+				       "Only %d traffic classes supported",
+				       adapter->num_tx_queues);
+		return -EOPNOTSUPP;
+	}
+
+	/* Only one queue per TC is supported. */
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		if (mqprio->qopt.count[i] != 1) {
+			NL_SET_ERR_MSG_MOD(mqprio->extack,
+					   "Only one queue per TC supported");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	/* Preemption is not supported yet. */
+	if (mqprio->preemptible_tcs) {
+		NL_SET_ERR_MSG_MOD(mqprio->extack,
+				   "Preemption is not supported yet");
+		return -EOPNOTSUPP;
+	}
+
+	igc_save_mqprio_params(adapter, mqprio->qopt.num_tc,
+			       mqprio->qopt.offset);
+
+	mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+apply:
+	return igc_tsn_offload_apply(adapter);
+}
+
 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			void *type_data)
 {
@@ -6546,6 +6639,9 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	case TC_SETUP_QDISC_CBS:
 		return igc_tsn_enable_cbs(adapter, type_data);
 
+	case TC_SETUP_QDISC_MQPRIO:
+		return igc_tsn_enable_mqprio(adapter, type_data);
+
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -7408,6 +7504,7 @@ static void igc_io_resume(struct pci_dev *pdev)
 	rtnl_lock();
 	if (netif_running(netdev)) {
 		if (igc_open(netdev)) {
+			rtnl_unlock();
 			netdev_err(netdev, "igc_open failed after reset\n");
 			return;
 		}
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 861f37076861..2801e5f24df9 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -240,7 +240,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
 		/* Read the MULTI GBT AN Control Register - reg 7.32 */
 		ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
 					    MMD_DEVADDR_SHIFT) |
-					    ANEG_MULTIGBT_AN_CTRL,
+					    IGC_ANEG_MULTIGBT_AN_CTRL,
 					    &aneg_multigbt_an_ctrl);
 
 		if (ret_val)
@@ -380,7 +380,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
 		ret_val = phy->ops.write_reg(hw,
 					     (STANDARD_AN_REG_MASK <<
 					     MMD_DEVADDR_SHIFT) |
-					     ANEG_MULTIGBT_AN_CTRL,
+					     IGC_ANEG_MULTIGBT_AN_CTRL,
 					     aneg_multigbt_an_ctrl);
 
 	return ret_val;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index e5b893fc5b66..12ddc5793651 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -238,6 +238,8 @@
 #define IGC_TQAVCC(_n)		(0x3004 + ((_n) * 0x40))
 #define IGC_TQAVHC(_n)		(0x300C + ((_n) * 0x40))
 
+#define IGC_TXARB		0x3354 /* Tx Arbitration Control TxARB - RW */
+
 /* System Time Registers */
 #define IGC_SYSTIML	0x0B600  /* System time register Low - RO */
 #define IGC_SYSTIMH	0x0B604  /* System time register High - RO */
@@ -308,6 +310,16 @@
 #define IGC_IPCNFG	0x0E38 /* Internal PHY Configuration */
 #define IGC_EEE_SU	0x0E34 /* EEE Setup */
 
+/* MULTI GBT AN Control Register - reg. 7.32 */
+#define IGC_ANEG_MULTIGBT_AN_CTRL	0x0020
+
+/* EEE ANeg Advertisement Register - reg 7.60 and reg 7.62 */
+#define IGC_ANEG_EEE_AB1	0x003c
+#define IGC_ANEG_EEE_AB2	0x003e
+/* EEE ANeg Link-Partner Advertisement Register - reg 7.61 and reg 7.63 */
+#define IGC_ANEG_EEE_LP_AB1	0x003d
+#define IGC_ANEG_EEE_LP_AB2	0x003f
+
 /* LTR registers */
 #define IGC_LTRC	0x01A0 /* Latency Tolerance Reporting Control */
 #define IGC_LTRMINV	0x5BB0 /* LTR Minimum Value */
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 22cefb1eeedf..1e44374ca1ff 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -46,15 +46,25 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
 	if (is_cbs_enabled(adapter))
 		new_flags |= IGC_FLAG_TSN_QAV_ENABLED;
 
+	if (adapter->strict_priority_enable)
+		new_flags |= IGC_FLAG_TSN_LEGACY_ENABLED;
+
 	return new_flags;
 }
 
+static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN);
+}
+
 void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
 	u16 txoffset;
 
-	if (!is_any_launchtime(adapter))
+	if (!igc_tsn_is_tx_mode_in_tsn(adapter))
 		return;
 
 	switch (adapter->link_speed) {
@@ -78,11 +88,49 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
 	wr32(IGC_GTXOFFSET, txoffset);
 }
 
+static void igc_tsn_restore_retx_default(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 retxctl;
+
+	retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK;
+	wr32(IGC_RETX_CTL, retxctl);
+}
+
+bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
+		adapter->taprio_offload_enable;
+}
+
+static void igc_tsn_tx_arb(struct igc_adapter *adapter, u16 *queue_per_tc)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 txarb;
+
+	txarb = rd32(IGC_TXARB);
+
+	txarb &= ~(IGC_TXARB_TXQ_PRIO_0_MASK |
+		   IGC_TXARB_TXQ_PRIO_1_MASK |
+		   IGC_TXARB_TXQ_PRIO_2_MASK |
+		   IGC_TXARB_TXQ_PRIO_3_MASK);
+
+	txarb |= IGC_TXARB_TXQ_PRIO_0(queue_per_tc[3]);
+	txarb |= IGC_TXARB_TXQ_PRIO_1(queue_per_tc[2]);
+	txarb |= IGC_TXARB_TXQ_PRIO_2(queue_per_tc[1]);
+	txarb |= IGC_TXARB_TXQ_PRIO_3(queue_per_tc[0]);
+
+	wr32(IGC_TXARB, txarb);
+}
+
 /* Returns the TSN specific registers to their default values after
  * the adapter is reset.
  */
 static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 {
+	u16 queue_per_tc[4] = { 3, 2, 1, 0 };
 	struct igc_hw *hw = &adapter->hw;
 	u32 tqavctrl;
 	int i;
@@ -91,6 +139,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
 	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
 
+	if (igc_is_device_id_i226(hw))
+		igc_tsn_restore_retx_default(adapter);
+
 	tqavctrl = rd32(IGC_TQAVCTRL);
 	tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
 		      IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS);
@@ -106,11 +157,39 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 	wr32(IGC_QBVCYCLET_S, 0);
 	wr32(IGC_QBVCYCLET, NSEC_PER_SEC);
 
+	/* Reset mqprio TC configuration. */
+	netdev_reset_tc(adapter->netdev);
+
+	/* Restore the default Tx arbitration: Priority 0 has the highest
+	 * priority and is assigned to queue 0 and so on and so forth.
+	 */
+	igc_tsn_tx_arb(adapter, queue_per_tc);
+
 	adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED;
+	adapter->flags &= ~IGC_FLAG_TSN_LEGACY_ENABLED;
 
 	return 0;
 }
 
+/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes
+ * to 88 Bytes by setting RETX_CTL register using the recommendation from:
+ * a) Ethernet Controller I225/I226 Specification Update Rev 2.1
+ *    Item 9: TSN: Packet Transmission Might Cross the Qbv Window
+ * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control
+ */
+static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 retxctl, watermark;
+
+	retxctl = rd32(IGC_RETX_CTL);
+	watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK;
+	/* Set QBVFULLTH value using watermark and set QBVFULLEN */
+	retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) |
+		   IGC_RETX_CTL_QBVFULLEN;
+	wr32(IGC_RETX_CTL, retxctl);
+}
+
 static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
@@ -123,6 +202,43 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
 	wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
 
+	if (igc_is_device_id_i226(hw))
+		igc_tsn_set_retx_qbvfullthreshold(adapter);
+
+	if (adapter->strict_priority_enable) {
+		int err;
+
+		err = netdev_set_num_tc(adapter->netdev, adapter->num_tc);
+		if (err)
+			return err;
+
+		for (i = 0; i < adapter->num_tc; i++) {
+			err = netdev_set_tc_queue(adapter->netdev, i, 1,
+						  adapter->queue_per_tc[i]);
+			if (err)
+				return err;
+		}
+
+		/* In case the card is configured with less than four queues. */
+		for (; i < IGC_MAX_TX_QUEUES; i++)
+			adapter->queue_per_tc[i] = i;
+
+		/* Configure queue priorities according to the user provided
+		 * mapping.
+		 */
+		igc_tsn_tx_arb(adapter, adapter->queue_per_tc);
+
+		/* Enable legacy TSN mode which will do strict priority without
+		 * any other TSN features.
+		 */
+		tqavctrl = rd32(IGC_TQAVCTRL);
+		tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN;
+		tqavctrl &= ~IGC_TQAVCTRL_ENHANCED_QAV;
+		wr32(IGC_TQAVCTRL, tqavctrl);
+
+		return 0;
+	}
+
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		struct igc_ring *ring = adapter->tx_ring[i];
 		u32 txqctl = 0;
@@ -262,14 +378,6 @@ skip_cbs:
 		s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle);
 
 		base_time = ktime_add_ns(base_time, (n + 1) * cycle);
-
-		/* Increase the counter if scheduling into the past while
-		 * Gate Control List (GCL) is running.
-		 */
-		if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
-		    (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
-		    (adapter->qbv_count > 1))
-			adapter->qbv_config_change_errors++;
 	} else {
 		if (igc_is_device_id_i226(hw)) {
 			ktime_t adjust_time, expires_time;
@@ -331,15 +439,22 @@ int igc_tsn_reset(struct igc_adapter *adapter)
 	return err;
 }
 
-int igc_tsn_offload_apply(struct igc_adapter *adapter)
+static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter)
 {
-	struct igc_hw *hw = &adapter->hw;
+	bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) &
+				  IGC_FLAG_TSN_ANY_ENABLED);
+
+	return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) ||
+	       (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter));
+}
 
-	/* Per I225/6 HW Design Section 7.5.2.1, transmit mode
-	 * cannot be changed dynamically. Require reset the adapter.
+int igc_tsn_offload_apply(struct igc_adapter *adapter)
+{
+	/* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change
+	 * from legacy->tsn or tsn->legacy, then reset adapter is needed.
 	 */
 	if (netif_running(adapter->netdev) &&
-	    (igc_is_device_id_i225(hw) || !adapter->qbv_count)) {
+	    igc_tsn_will_tx_mode_change(adapter)) {
 		schedule_work(&adapter->reset_task);
 		return 0;
 	}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
index b53e6af560b7..98ec845a86bf 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.h
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
@@ -7,5 +7,6 @@
 int igc_tsn_offload_apply(struct igc_adapter *adapter);
 int igc_tsn_reset(struct igc_adapter *adapter);
 void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter);
+bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter);
 
 #endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index e85f7d2e8810..f2709b10c2e5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -317,7 +317,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 		int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
 
 #ifdef IXGBE_FCOE
-		if (adapter->netdev->features & NETIF_F_FCOE_MTU)
+		if (adapter->netdev->fcoe_mtu)
 			max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 4cac76254966..9482e0cca8b7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3196,16 +3196,12 @@ static int ixgbe_get_ts_info(struct net_device *dev,
 
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (adapter->ptp_clock)
 		info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		info->phc_index = -1;
 
 	info->tx_types =
 		BIT(HWTSTAMP_TX_OFF) |
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 18d63c8c2ff4..955dced844a9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -858,7 +858,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev)
 
 	/* enable FCoE and notify stack */
 	adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
-	netdev->features |= NETIF_F_FCOE_MTU;
+	netdev->fcoe_mtu = true;
 	netdev_features_change(netdev);
 
 	/* release existing queues and reallocate them */
@@ -898,7 +898,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev)
 
 	/* disable FCoE and notify stack */
 	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
-	netdev->features &= ~NETIF_F_FCOE_MTU;
+	netdev->fcoe_mtu = false;
 
 	netdev_features_change(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 0ee943db3dc9..16fa621ce0ff 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -981,7 +981,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 			set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
 
 #ifdef IXGBE_FCOE
-		if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
+		if (adapter->netdev->fcoe_mtu) {
 			struct ixgbe_ring_feature *f;
 			f = &adapter->ring_feature[RING_F_FCOE];
 			if ((rxr_idx >= f->offset) &&
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8057cef61f39..8b8404d8c946 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5079,7 +5079,7 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
 		netif_set_tso_max_size(adapter->netdev, 32768);
 
 #ifdef IXGBE_FCOE
-	if (adapter->netdev->features & NETIF_F_FCOE_MTU)
+	if (adapter->netdev->fcoe_mtu)
 		max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif
 
@@ -5136,8 +5136,7 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
 
 #ifdef IXGBE_FCOE
 	/* FCoE traffic class uses FCOE jumbo frames */
-	if ((dev->features & NETIF_F_FCOE_MTU) &&
-	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
+	if (dev->fcoe_mtu && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE &&
 	    (pb == ixgbe_fcoe_get_tc(adapter)))
 		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
 #endif
@@ -5197,8 +5196,7 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
 
 #ifdef IXGBE_FCOE
 	/* FCoE traffic class uses FCOE jumbo frames */
-	if ((dev->features & NETIF_F_FCOE_MTU) &&
-	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
+	if (dev->fcoe_mtu && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE &&
 	    (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up)))
 		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
 #endif
@@ -11096,8 +11094,7 @@ skip_sriov:
 				    NETIF_F_FCOE_CRC;
 
 		netdev->vlan_features |= NETIF_F_FSO |
-					 NETIF_F_FCOE_CRC |
-					 NETIF_F_FCOE_MTU;
+					 NETIF_F_FCOE_CRC;
 	}
 #endif /* IXGBE_FCOE */
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index fcfd0a075eee..e71715f5da22 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -495,7 +495,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf
 		int err = 0;
 
 #ifdef CONFIG_FCOE
-		if (dev->features & NETIF_F_FCOE_MTU)
+		if (dev->fcoe_mtu)
 			pf_max_frame = max_t(int, pf_max_frame,
 					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
 
@@ -857,7 +857,7 @@ static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf)
 		int pf_max_frame = dev->mtu + ETH_HLEN;
 
 #if IS_ENABLED(CONFIG_FCOE)
-		if (dev->features & NETIF_F_FCOE_MTU)
+		if (dev->fcoe_mtu)
 			pf_max_frame = max_t(int, pf_max_frame,
 					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif /* CONFIG_FCOE */
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index b06e24562973..d8be0e4dcb07 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -946,15 +946,13 @@ jme_udpsum(struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IP))
 		return csum;
 	skb_set_network_header(skb, ETH_HLEN);
-	if ((ip_hdr(skb)->protocol != IPPROTO_UDP) ||
-	    (skb->len < (ETH_HLEN +
-			(ip_hdr(skb)->ihl << 2) +
-			sizeof(struct udphdr)))) {
+
+	if (ip_hdr(skb)->protocol != IPPROTO_UDP ||
+	    skb->len < (ETH_HLEN + ip_hdrlen(skb) + sizeof(struct udphdr))) {
 		skb_reset_network_header(skb);
 		return csum;
 	}
-	skb_set_transport_header(skb,
-			ETH_HLEN + (ip_hdr(skb)->ihl << 2));
+	skb_set_transport_header(skb, ETH_HLEN + ip_hdrlen(skb));
 	csum = udp_hdr(skb)->check;
 	skb_reset_transport_header(skb);
 	skb_reset_network_header(skb);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 9e6984815386..3c289bfe0a09 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -95,7 +95,6 @@ struct ltq_etop_priv {
 	struct mii_bus *mii_bus;
 
 	struct ltq_etop_chan ch[MAX_DMA_CHAN];
-	int tx_free[MAX_DMA_CHAN >> 1];
 
 	int tx_burst_len;
 	int rx_burst_len;
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index f35ae2c88091..9e80899546d9 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2802,7 +2802,7 @@ port_err:
 static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
 {
 	struct mv643xx_eth_shared_platform_data *pd;
-	struct device_node *pnp, *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node;
 	int ret;
 
 	/* bail out if not registered from DT */
@@ -2816,10 +2816,9 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
 
 	mv643xx_eth_property(np, "tx-checksum-limit", pd->tx_csum_limit);
 
-	for_each_available_child_of_node(np, pnp) {
+	for_each_available_child_of_node_scoped(np, pnp) {
 		ret = mv643xx_eth_shared_of_add_port(pdev, pnp);
 		if (ret) {
-			of_node_put(pnp);
 			mv643xx_eth_shared_of_remove();
 			return ret;
 		}
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 9190eff6c0bb..e1d003fdbc2e 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -104,7 +104,7 @@ static int orion_mdio_wait_ready(const struct orion_mdio_ops *ops,
 			return 0;
 	} else {
 		/* wait_event_timeout does not guarantee a delay of at
-		 * least one whole jiffie, so timeout must be no less
+		 * least one whole jiffy, so timeout must be no less
 		 * than two.
 		 */
 		timeout = max(usecs_to_jiffies(MVMDIO_SMI_TIMEOUT), 2);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 41894834fb53..d72b2d5f96db 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1781,7 +1781,7 @@ static int mvneta_txq_sent_desc_proc(struct mvneta_port *pp,
 }
 
 /* Set TXQ descriptors fields relevant for CSUM calculation */
-static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvneta_txq_desc_csum(int l3_offs, __be16 l3_proto,
 				int ip_hdr_len, int l4_proto)
 {
 	u32 command;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index e809f91c08fb..9e02e4367bec 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -1088,7 +1088,7 @@ struct mvpp2 {
 	unsigned int max_port_rxqs;
 
 	/* Workqueue to gather hardware statistics */
-	char queue_name[30];
+	char queue_name[31];
 	struct workqueue_struct *stats_queue;
 
 	/* Debugfs root entry */
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index 40aeaa7bd739..1641791a2d5b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -1522,29 +1522,19 @@ static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
 	return 0;
 }
 
-int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 port_ctx)
 {
 	u32 rss_ctx;
-	int ret, i;
+	int ret;
 
 	ret = mvpp22_rss_context_create(port, &rss_ctx);
 	if (ret)
 		return ret;
 
-	/* Find the first available context number in the port, starting from 1.
-	 * Context 0 on each port is reserved for the default context.
-	 */
-	for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
-		if (port->rss_ctx[i] < 0)
-			break;
-	}
-
-	if (i == MVPP22_N_RSS_TABLES)
+	if (WARN_ON_ONCE(port->rss_ctx[port_ctx] >= 0))
 		return -EINVAL;
 
-	port->rss_ctx[i] = rss_ctx;
-	*port_ctx = i;
-
+	port->rss_ctx[port_ctx] = rss_ctx;
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 663157dc8062..85c9c6e80678 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -264,7 +264,7 @@ int mvpp22_port_rss_init(struct mvpp2_port *port);
 int mvpp22_port_rss_enable(struct mvpp2_port *port);
 int mvpp22_port_rss_disable(struct mvpp2_port *port);
 
-int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 rss_ctx);
 int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
 
 int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8c45ad983abc..3880dcc0418b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port,
 static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en)
 {
 	struct mvpp2_port *port;
-	int i;
+	int i, j;
 
 	for (i = 0; i < priv->port_count; i++) {
 		port = priv->port_list[i];
 		if (port->priv->percpu_pools) {
-			for (i = 0; i < port->nrxqs; i++)
-				mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i],
+			for (j = 0; j < port->nrxqs; j++)
+				mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j],
 							port->tx_fc & en);
 		} else {
 			mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en);
@@ -5268,8 +5268,6 @@ static int mvpp2_ethtool_get_ts_info(struct net_device *dev,
 
 	info->phc_index = mvpp22_tai_ptp_clock_index(port->priv->tai);
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -5696,40 +5694,82 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev,
 	return ret;
 }
 
-static int mvpp2_ethtool_set_rxfh(struct net_device *dev,
-				  struct ethtool_rxfh_param *rxfh,
-				  struct netlink_ext_ack *extack)
+static bool mvpp2_ethtool_rxfh_okay(struct mvpp2_port *port,
+				    const struct ethtool_rxfh_param *rxfh)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
-	u32 *rss_context = &rxfh->rss_context;
-	int ret = 0;
-
 	if (!mvpp22_rss_is_supported(port))
-		return -EOPNOTSUPP;
+		return false;
 
 	if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
 	    rxfh->hfunc != ETH_RSS_HASH_CRC32)
-		return -EOPNOTSUPP;
+		return false;
 
 	if (rxfh->key)
+		return false;
+
+	return true;
+}
+
+static int mvpp2_create_rxfh_context(struct net_device *dev,
+				     struct ethtool_rxfh_context *ctx,
+				     const struct ethtool_rxfh_param *rxfh,
+				     struct netlink_ext_ack *extack)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp2_ethtool_rxfh_okay(port, rxfh))
 		return -EOPNOTSUPP;
 
-	if (*rss_context && rxfh->rss_delete)
-		return mvpp22_port_rss_ctx_delete(port, *rss_context);
+	ctx->hfunc = ETH_RSS_HASH_CRC32;
 
-	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-		ret = mvpp22_port_rss_ctx_create(port, rss_context);
-		if (ret)
-			return ret;
-	}
+	ret = mvpp22_port_rss_ctx_create(port, rxfh->rss_context);
+	if (ret)
+		return ret;
 
-	if (rxfh->indir)
-		ret = mvpp22_port_rss_ctx_indir_set(port, *rss_context,
+	if (!rxfh->indir)
+		ret = mvpp22_port_rss_ctx_indir_get(port, rxfh->rss_context,
+						    ethtool_rxfh_context_indir(ctx));
+	else
+		ret = mvpp22_port_rss_ctx_indir_set(port, rxfh->rss_context,
 						    rxfh->indir);
+	return ret;
+}
+
+static int mvpp2_modify_rxfh_context(struct net_device *dev,
+				     struct ethtool_rxfh_context *ctx,
+				     const struct ethtool_rxfh_param *rxfh,
+				     struct netlink_ext_ack *extack)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
+	if (!mvpp2_ethtool_rxfh_okay(port, rxfh))
+		return -EOPNOTSUPP;
+
+	if (rxfh->indir)
+		ret = mvpp22_port_rss_ctx_indir_set(port, rxfh->rss_context,
+						    rxfh->indir);
 	return ret;
 }
 
+static int mvpp2_remove_rxfh_context(struct net_device *dev,
+				     struct ethtool_rxfh_context *ctx,
+				     u32 rss_context,
+				     struct netlink_ext_ack *extack)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	return mvpp22_port_rss_ctx_delete(port, rss_context);
+}
+
+static int mvpp2_ethtool_set_rxfh(struct net_device *dev,
+				  struct ethtool_rxfh_param *rxfh,
+				  struct netlink_ext_ack *extack)
+{
+	return mvpp2_modify_rxfh_context(dev, NULL, rxfh, extack);
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -5749,7 +5789,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
-	.cap_rss_ctx_supported	= true,
+	.rxfh_max_num_contexts	= MVPP22_N_RSS_TABLES,
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
 	.nway_reset		= mvpp2_ethtool_nway_reset,
@@ -5772,6 +5812,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_rxfh_indir_size	= mvpp2_ethtool_get_rxfh_indir_size,
 	.get_rxfh		= mvpp2_ethtool_get_rxfh,
 	.set_rxfh		= mvpp2_ethtool_set_rxfh,
+	.create_rxfh_context	= mvpp2_create_rxfh_context,
+	.modify_rxfh_context	= mvpp2_modify_rxfh_context,
+	.remove_rxfh_context	= mvpp2_remove_rxfh_context,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -7417,8 +7460,6 @@ static int mvpp2_get_sram(struct platform_device *pdev,
 
 static int mvpp2_probe(struct platform_device *pdev)
 {
-	struct fwnode_handle *fwnode = pdev->dev.fwnode;
-	struct fwnode_handle *port_fwnode;
 	struct mvpp2 *priv;
 	struct resource *res;
 	void __iomem *base;
@@ -7591,7 +7632,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 	}
 
 	/* Map DTS-active ports. Should be done before FIFO mvpp2_init */
-	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
+	device_for_each_child_node_scoped(&pdev->dev, port_fwnode) {
 		if (!fwnode_property_read_u32(port_fwnode, "port-id", &i))
 			priv->port_map |= BIT(i);
 	}
@@ -7614,7 +7655,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 		goto err_axi_clk;
 
 	/* Initialize ports */
-	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
+	device_for_each_child_node_scoped(&pdev->dev, port_fwnode) {
 		err = mvpp2_port_probe(pdev, port_fwnode, priv);
 		if (err < 0)
 			goto err_port_probe;
@@ -7653,14 +7694,8 @@ static int mvpp2_probe(struct platform_device *pdev)
 	return 0;
 
 err_port_probe:
-	fwnode_handle_put(port_fwnode);
-
-	i = 0;
-	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
-		if (priv->port_list[i])
-			mvpp2_port_remove(priv->port_list[i]);
-		i++;
-	}
+	for (i = 0; i < priv->port_count; i++)
+		mvpp2_port_remove(priv->port_list[i]);
 err_axi_clk:
 	clk_disable_unprepare(priv->axi_clk);
 err_mg_core_clk:
@@ -7677,18 +7712,13 @@ err_pp_clk:
 static void mvpp2_remove(struct platform_device *pdev)
 {
 	struct mvpp2 *priv = platform_get_drvdata(pdev);
-	struct fwnode_handle *fwnode = pdev->dev.fwnode;
-	int i = 0, poolnum = MVPP2_BM_POOLS_NUM;
-	struct fwnode_handle *port_fwnode;
+	int i, poolnum = MVPP2_BM_POOLS_NUM;
 
 	mvpp2_dbgfs_cleanup(priv);
 
-	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
-		if (priv->port_list[i]) {
-			mutex_destroy(&priv->port_list[i]->gather_stats_lock);
-			mvpp2_port_remove(priv->port_list[i]);
-		}
-		i++;
+	for (i = 0; i < priv->port_count; i++) {
+		mutex_destroy(&priv->port_list[i]->gather_stats_lock);
+		mvpp2_port_remove(priv->port_list[i]);
 	}
 
 	destroy_workqueue(priv->stats_queue);
@@ -7711,7 +7741,7 @@ static void mvpp2_remove(struct platform_device *pdev)
 				  aggr_txq->descs_dma);
 	}
 
-	if (is_acpi_node(port_fwnode))
+	if (!dev_of_node(&pdev->dev))
 		return;
 
 	clk_disable_unprepare(priv->axi_clk);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ed2160cc5acb..6ea2f3071fe8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1856,8 +1856,9 @@ struct cpt_flt_eng_info_req {
 
 struct cpt_flt_eng_info_rsp {
 	struct mbox_msghdr hdr;
-	u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU];
-	u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU];
+#define CPT_AF_MAX_FLT_INT_VECS 3
+	u64 flt_eng_map[CPT_AF_MAX_FLT_INT_VECS];
+	u64 rcvrd_eng_map[CPT_AF_MAX_FLT_INT_VECS];
 	u64 rsvd;
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index ac7ee3f3598c..1a97fb9032fa 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -2479,9 +2479,9 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
 		goto free_regions;
 	}
 
-	mw->mbox_wq = alloc_workqueue(name,
+	mw->mbox_wq = alloc_workqueue("%s",
 				      WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
-				      num);
+				      num, name);
 	if (!mw->mbox_wq) {
 		err = -ENOMEM;
 		goto unmap_regions;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 03ee93fd9e94..5016ba82e142 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -319,6 +319,7 @@ struct nix_mark_format {
 
 /* smq(flush) to tl1 cir/pir info */
 struct nix_smq_tree_ctx {
+	u16 schq;
 	u64 cir_off;
 	u64 cir_val;
 	u64 pir_off;
@@ -328,8 +329,6 @@ struct nix_smq_tree_ctx {
 /* smq flush context */
 struct nix_smq_flush_ctx {
 	int smq;
-	u16 tl1_schq;
-	u16 tl2_schq;
 	struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT];
 };
 
@@ -400,6 +399,7 @@ struct hw_cap {
 	bool	nix_multiple_dwrr_mtu;   /* Multiple DWRR_MTU to choose from */
 	bool	npc_hash_extract; /* Hash extract enabled ? */
 	bool	npc_exact_match_enabled; /* Exact match supported ? */
+	bool    cpt_rxc;   /* Is CPT-RXC supported */
 };
 
 struct rvu_hwinfo {
@@ -690,6 +690,35 @@ static inline bool is_cnf10ka_a0(struct rvu *rvu)
 	return false;
 }
 
+static inline bool is_cn10ka_a0(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A &&
+	    (pdev->revision & 0x0F) == 0x0)
+		return true;
+	return false;
+}
+
+static inline bool is_cn10ka_a1(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A &&
+	    (pdev->revision & 0x0F) == 0x1)
+		return true;
+	return false;
+}
+
+static inline bool is_cn10kb(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B)
+		return true;
+	return false;
+}
+
 static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu)
 {
 	u64 npc_const3;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 3e09d2285814..3c5bbaf12e59 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -19,6 +19,12 @@
 /* Length of initial context fetch in 128 byte words */
 #define CPT_CTX_ILEN    1ULL
 
+/* Interrupt vector count of CPT RVU and RAS interrupts */
+#define CPT_10K_AF_RVU_RAS_INT_VEC_CNT  2
+
+/* Default CPT_AF_RXC_CFG1:max_rxc_icb_cnt */
+#define CPT_DFLT_MAX_RXC_ICB_CNT  0xC0ULL
+
 #define cpt_get_eng_sts(e_min, e_max, rsp, etype)                   \
 ({                                                                  \
 	u64 free_sts = 0, busy_sts = 0;                             \
@@ -37,6 +43,41 @@
 	(_rsp)->free_sts_##etype = free_sts;                        \
 })
 
+#define MAX_AE  GENMASK_ULL(47, 32)
+#define MAX_IE  GENMASK_ULL(31, 16)
+#define MAX_SE  GENMASK_ULL(15, 0)
+
+static u16 cpt_max_engines_get(struct rvu *rvu)
+{
+	u16 max_ses, max_ies, max_aes;
+	u64 reg;
+
+	reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1);
+	max_ses = FIELD_GET(MAX_SE, reg);
+	max_ies = FIELD_GET(MAX_IE, reg);
+	max_aes = FIELD_GET(MAX_AE, reg);
+
+	return max_ses + max_ies + max_aes;
+}
+
+/* Number of flt interrupt vectors are depends on number of engines that the
+ * chip has. Each flt vector represents 64 engines.
+ */
+static int cpt_10k_flt_nvecs_get(struct rvu *rvu, u16 max_engs)
+{
+	int flt_vecs;
+
+	flt_vecs = DIV_ROUND_UP(max_engs, 64);
+
+	if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) {
+		dev_warn_once(rvu->dev, "flt_vecs:%d exceeds the max vectors:%d\n",
+			      flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX);
+		flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX;
+	}
+
+	return flt_vecs;
+}
+
 static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr)
 {
 	struct rvu_block *block = ptr;
@@ -150,17 +191,26 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off)
 {
 	struct rvu *rvu = block->rvu;
 	int blkaddr = block->addr;
-	int i;
+	int i, flt_vecs;
+	u16 max_engs;
+	u8 nr;
+
+	max_engs = cpt_max_engines_get(rvu);
+	flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs);
 
 	/* Disable all CPT AF interrupts */
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL);
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL);
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF);
+	for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) {
+		nr = (max_engs > 64) ? 64 : max_engs;
+		max_engs -= nr;
+		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i),
+			    INTR_MASK(nr));
+	}
 
 	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1);
 	rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1);
 
-	for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++)
+	/* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */
+	for (i = 0; i < flt_vecs + CPT_10K_AF_RVU_RAS_INT_VEC_CNT; i++)
 		if (rvu->irq_allocated[off + i]) {
 			free_irq(pci_irq_vector(rvu->pdev, off + i), block);
 			rvu->irq_allocated[off + i] = false;
@@ -206,12 +256,18 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu)
 
 static int cpt_10k_register_interrupts(struct rvu_block *block, int off)
 {
+	int rvu_intr_vec, ras_intr_vec;
 	struct rvu *rvu = block->rvu;
 	int blkaddr = block->addr;
 	irq_handler_t flt_fn;
-	int i, ret;
+	int i, ret, flt_vecs;
+	u16 max_engs;
+	u8 nr;
+
+	max_engs = cpt_max_engines_get(rvu);
+	flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs);
 
-	for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) {
+	for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) {
 		sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i);
 
 		switch (i) {
@@ -229,20 +285,24 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off)
 						    flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]);
 		if (ret)
 			goto err;
-		if (i == CPT_10K_AF_INT_VEC_FLT2)
-			rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF);
-		else
-			rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL);
+
+		nr = (max_engs > 64) ? 64 : max_engs;
+		max_engs -= nr;
+		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i),
+			    INTR_MASK(nr));
 	}
 
-	ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU,
+	rvu_intr_vec = flt_vecs;
+	ras_intr_vec = rvu_intr_vec + 1;
+
+	ret = rvu_cpt_do_register_interrupt(block, off + rvu_intr_vec,
 					    rvu_cpt_af_rvu_intr_handler,
 					    "CPTAF RVU");
 	if (ret)
 		goto err;
 	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1);
 
-	ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RAS,
+	ret = rvu_cpt_do_register_interrupt(block, off + ras_intr_vec,
 					    rvu_cpt_af_ras_intr_handler,
 					    "CPTAF RAS");
 	if (ret)
@@ -632,7 +692,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu,
 	return ret;
 }
 
-static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
+static bool validate_and_update_reg_offset(struct rvu *rvu,
+					   struct cpt_rd_wr_reg_msg *req,
+					   u64 *reg_offset)
 {
 	u64 offset = req->reg_offset;
 	int blkaddr, num_lfs, lf;
@@ -663,6 +725,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 		if (lf < 0)
 			return false;
 
+		/* Translate local LF's offset to global CPT LF's offset to
+		 * access LFX register.
+		 */
+		*reg_offset = (req->reg_offset & 0xFF000) + (lf << 3);
+
 		return true;
 	} else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) {
 		/* Registers that can be accessed from PF */
@@ -673,6 +740,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 		case CPT_AF_BLK_RST:
 		case CPT_AF_CONSTANTS1:
 		case CPT_AF_CTX_FLUSH_TIMER:
+		case CPT_AF_RXC_CFG1:
 			return true;
 		}
 
@@ -697,7 +765,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 					struct cpt_rd_wr_reg_msg *rsp)
 {
 	u64 offset = req->reg_offset;
-	int blkaddr, lf;
+	int blkaddr;
 
 	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
 	if (blkaddr < 0)
@@ -708,18 +776,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 	    !is_cpt_vf(rvu, req->hdr.pcifunc))
 		return CPT_AF_ERR_ACCESS_DENIED;
 
-	if (!is_valid_offset(rvu, req))
+	if (!validate_and_update_reg_offset(rvu, req, &offset))
 		return CPT_AF_ERR_ACCESS_DENIED;
 
-	/* Translate local LF used by VFs to global CPT LF */
-	lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc,
-			(offset & 0xFFF) >> 3);
-
-	/* Translate local LF's offset to global CPT LF's offset */
-	offset &= 0xFF000;
-	offset += lf << 3;
-
-	rsp->reg_offset = offset;
+	rsp->reg_offset = req->reg_offset;
 	rsp->ret_val = req->ret_val;
 	rsp->is_write = req->is_write;
 
@@ -733,6 +793,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 
 static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
 {
+	struct rvu_hwinfo *hw = rvu->hw;
+
 	if (is_rvu_otx2(rvu))
 		return;
 
@@ -756,14 +818,16 @@ static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
 	rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR);
 	rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID);
 	rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER);
+	rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0));
+	rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1));
 
+	if (!hw->cap.cpt_rxc)
+		return;
 	rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME);
 	rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG);
 	rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS);
 	rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS);
 	rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG);
-	rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0));
-	rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1));
 }
 
 static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
@@ -922,13 +986,17 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r
 	struct rvu_block *block;
 	unsigned long flags;
 	int blkaddr, vec;
+	int flt_vecs;
+	u16 max_engs;
 
 	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
 	if (blkaddr < 0)
 		return blkaddr;
 
 	block = &rvu->hw->block[blkaddr];
-	for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) {
+	max_engs = cpt_max_engines_get(rvu);
+	flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs);
+	for (vec = 0; vec < flt_vecs; vec++) {
 		spin_lock_irqsave(&rvu->cpt_intr_lock, flags);
 		rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec];
 		rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec];
@@ -944,10 +1012,11 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r
 static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr)
 {
 	struct cpt_rxc_time_cfg_req req, prev;
+	struct rvu_hwinfo *hw = rvu->hw;
 	int timeout = 2000;
 	u64 reg;
 
-	if (is_rvu_otx2(rvu))
+	if (!hw->cap.cpt_rxc)
 		return;
 
 	/* Set time limit to minimum values, so that rxc entries will be
@@ -1220,10 +1289,30 @@ unlock:
 	return 0;
 }
 
+#define MAX_RXC_ICB_CNT  GENMASK_ULL(40, 32)
+
 int rvu_cpt_init(struct rvu *rvu)
 {
+	struct rvu_hwinfo *hw = rvu->hw;
+	u64 reg_val;
+
 	/* Retrieve CPT PF number */
 	rvu->cpt_pf_num = get_cpt_pf_num(rvu);
+	if (is_block_implemented(rvu->hw, BLKADDR_CPT0) && !is_rvu_otx2(rvu) &&
+	    !is_cn10kb(rvu))
+		hw->cap.cpt_rxc = true;
+
+	if (hw->cap.cpt_rxc && !is_cn10ka_a0(rvu) && !is_cn10ka_a1(rvu)) {
+		/* Set CPT_AF_RXC_CFG1:max_rxc_icb_cnt to 0xc0 to not effect
+		 * inline inbound peak performance
+		 */
+		reg_val = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1);
+		reg_val &= ~MAX_RXC_ICB_CNT;
+		reg_val |= FIELD_PREP(MAX_RXC_ICB_CNT,
+				      CPT_DFLT_MAX_RXC_ICB_CNT);
+		rvu_write64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1, reg_val);
+	}
+
 	spin_lock_init(&rvu->cpt_intr_lock);
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 4a4ef5bd9e0b..87ba77e5026a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -838,10 +838,10 @@ RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL);
 
 static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
 {
+	char cgx[10], lmac[10], chan[10];
 	struct rvu *rvu = filp->private;
 	struct pci_dev *pdev = NULL;
 	struct mac_ops *mac_ops;
-	char cgx[10], lmac[10];
 	struct rvu_pfvf *pfvf;
 	int pf, domain, blkid;
 	u8 cgx_id, lmac_id;
@@ -852,7 +852,7 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
 	/* There can be no CGX devices at all */
 	if (!mac_ops)
 		return 0;
-	seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n",
+	seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\tCHAN\n",
 		   mac_ops->name);
 	for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
 		if (!is_pf_cgxmapped(rvu, pf))
@@ -876,8 +876,11 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
 				    &lmac_id);
 		sprintf(cgx, "%s%d", mac_ops->name, cgx_id);
 		sprintf(lmac, "LMAC%d", lmac_id);
-		seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n",
-			   dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac);
+		sprintf(chan, "%d",
+			rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0));
+		seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\t%s\n",
+			   dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac,
+			   chan);
 
 		pci_dev_put(pdev);
 	}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 222f9e00b836..82832a24fbd8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2259,14 +2259,13 @@ static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq,
 	schq = smq;
 	for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) {
 		smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl];
+		smq_tree_ctx->schq = schq;
 		if (lvl == NIX_TXSCH_LVL_TL1) {
-			smq_flush_ctx->tl1_schq = schq;
 			smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq);
 			smq_tree_ctx->pir_off = 0;
 			smq_tree_ctx->pir_val = 0;
 			parent_off = 0;
 		} else if (lvl == NIX_TXSCH_LVL_TL2) {
-			smq_flush_ctx->tl2_schq = schq;
 			smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq);
 			smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq);
 			parent_off = NIX_AF_TL2X_PARENT(schq);
@@ -2301,8 +2300,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
 {
 	struct nix_txsch *txsch;
 	struct nix_hw *nix_hw;
+	int tl2, tl2_schq;
 	u64 regoff;
-	int tl2;
 
 	nix_hw = get_nix_hw(rvu->hw, blkaddr);
 	if (!nix_hw)
@@ -2310,16 +2309,17 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
 
 	/* loop through all TL2s with matching PF_FUNC */
 	txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+	tl2_schq = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL2].schq;
 	for (tl2 = 0; tl2 < txsch->schq.max; tl2++) {
 		/* skip the smq(flush) TL2 */
-		if (tl2 == smq_flush_ctx->tl2_schq)
+		if (tl2 == tl2_schq)
 			continue;
 		/* skip unused TL2s */
 		if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE)
 			continue;
 		/* skip if PF_FUNC doesn't match */
 		if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) !=
-		    (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] &
+		    (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] &
 				    ~RVU_PFVF_FUNC_MASK)))
 			continue;
 		/* enable/disable XOFF */
@@ -2361,10 +2361,12 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
 			 int smq, u16 pcifunc, int nixlf)
 {
 	struct nix_smq_flush_ctx *smq_flush_ctx;
+	int err, restore_tx_en = 0, i;
 	int pf = rvu_get_pf(pcifunc);
 	u8 cgx_id = 0, lmac_id = 0;
-	int err, restore_tx_en = 0;
-	u64 cfg;
+	u16 tl2_tl3_link_schq;
+	u8 link, link_level;
+	u64 cfg, bmap = 0;
 
 	if (!is_rvu_otx2(rvu)) {
 		/* Skip SMQ flush if pkt count is zero */
@@ -2388,16 +2390,38 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
 	nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true);
 	nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false);
 
-	cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
-	/* Do SMQ flush and set enqueue xoff */
-	cfg |= BIT_ULL(50) | BIT_ULL(49);
-	rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
-
 	/* Disable backpressure from physical link,
 	 * otherwise SMQ flush may stall.
 	 */
 	rvu_cgx_enadis_rx_bp(rvu, pf, false);
 
+	link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
+			NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
+	tl2_tl3_link_schq = smq_flush_ctx->smq_tree_ctx[link_level].schq;
+	link = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL1].schq;
+
+	/* SMQ set enqueue xoff */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+	cfg |= BIT_ULL(50);
+	rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+
+	/* Clear all NIX_AF_TL3_TL2_LINK_CFG[ENA] for the TL3/TL2 queue */
+	for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
+		cfg = rvu_read64(rvu, blkaddr,
+				 NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
+		if (!(cfg & BIT_ULL(12)))
+			continue;
+		bmap |= (1 << i);
+		cfg &= ~BIT_ULL(12);
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
+	}
+
+	/* Do SMQ flush and set enqueue xoff */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+	cfg |= BIT_ULL(50) | BIT_ULL(49);
+	rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+
 	/* Wait for flush to complete */
 	err = rvu_poll_reg(rvu, blkaddr,
 			   NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true);
@@ -2406,6 +2430,17 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
 			 "NIXLF%d: SMQ%d flush failed, txlink might be busy\n",
 			 nixlf, smq);
 
+	/* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */
+	for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
+		if (!(bmap & (1 << i)))
+			continue;
+		cfg = rvu_read64(rvu, blkaddr,
+				 NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
+		cfg |= BIT_ULL(12);
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
+	}
+
 	/* clear XOFF on TL2s */
 	nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true);
 	nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index d56be5fb7eb4..2b299fa85159 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -545,6 +545,7 @@
 #define CPT_AF_CTX_PSH_PC               (0x49450ull)
 #define CPT_AF_CTX_PSH_LATENCY_PC       (0x49458ull)
 #define CPT_AF_CTX_CAM_DATA(a)          (0x49800ull | (u64)(a) << 3)
+#define CPT_AF_RXC_CFG1                 (0x50000ull)
 #define CPT_AF_RXC_TIME                 (0x50010ull)
 #define CPT_AF_RXC_TIME_CFG             (0x50018ull)
 #define CPT_AF_RXC_DFRG                 (0x50020ull)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 5ef406c7e8a4..fc8da2090657 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -71,13 +71,11 @@ enum cpt_af_int_vec_e {
 	CPT_AF_INT_VEC_CNT	= 0x4,
 };
 
-enum cpt_10k_af_int_vec_e {
+enum cpt_cn10k_flt_int_vec_e {
 	CPT_10K_AF_INT_VEC_FLT0	= 0x0,
 	CPT_10K_AF_INT_VEC_FLT1	= 0x1,
 	CPT_10K_AF_INT_VEC_FLT2	= 0x2,
-	CPT_10K_AF_INT_VEC_RVU	= 0x3,
-	CPT_10K_AF_INT_VEC_RAS	= 0x4,
-	CPT_10K_AF_INT_VEC_CNT	= 0x5,
+	CPT_10K_AF_INT_VEC_FLT_MAX = 0x3,
 };
 
 /* NPA Admin function Interrupt Vector Enumeration */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 0db62eb0dab3..32468c663605 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -962,8 +962,6 @@ static int otx2_get_ts_info(struct net_device *netdev,
 		return ethtool_op_get_ts_info(netdev, info);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 3eb85949677a..933e18ba2fb2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -687,7 +687,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
 		} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
 			__be16 l3_proto = vlan_get_protocol(skb);
 			struct udphdr *udph = udp_hdr(skb);
-			u16 iplen;
+			__be16 iplen;
 
 			ext->lso_sb = skb_transport_offset(skb) +
 					sizeof(struct udphdr);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 63ae01954dfc..22ca6ee9665e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -633,7 +633,8 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
 	if (err)
 		goto err_dl_port_register;
 
-	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC;
+	dev->features |= NETIF_F_HW_TC;
+	dev->netns_local = true;
 	dev->netdev_ops = &prestera_netdev_ops;
 	dev->ethtool_ops = &prestera_ethtool_ops;
 	SET_NETDEV_DEV(dev, sw->dev->dev);
diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c
index 1c5b85a86df1..930f180688e5 100644
--- a/drivers/net/ethernet/mediatek/airoha_eth.c
+++ b/drivers/net/ethernet/mediatek/airoha_eth.c
@@ -18,6 +18,7 @@
 #include <uapi/linux/ppp_defs.h>
 
 #define AIROHA_MAX_NUM_GDM_PORTS	1
+#define AIROHA_MAX_NUM_QDMA		2
 #define AIROHA_MAX_NUM_RSTS		3
 #define AIROHA_MAX_NUM_XSI_RSTS		5
 #define AIROHA_MAX_MTU			2000
@@ -66,9 +67,11 @@
 #define FE_RST_GDM3_MBI_ARB_MASK	BIT(2)
 #define FE_RST_CORE_MASK		BIT(0)
 
+#define REG_FE_WAN_MAC_H		0x0030
 #define REG_FE_LAN_MAC_H		0x0040
-#define REG_FE_LAN_MAC_LMIN		0x0044
-#define REG_FE_LAN_MAC_LMAX		0x0048
+
+#define REG_FE_MAC_LMIN(_n)		((_n) + 0x04)
+#define REG_FE_MAC_LMAX(_n)		((_n) + 0x08)
 
 #define REG_FE_CDM1_OQ_MAP0		0x0050
 #define REG_FE_CDM1_OQ_MAP1		0x0054
@@ -727,7 +730,7 @@ struct airoha_queue_entry {
 };
 
 struct airoha_queue {
-	struct airoha_eth *eth;
+	struct airoha_qdma *qdma;
 
 	/* protect concurrent queue accesses */
 	spinlock_t lock;
@@ -746,7 +749,7 @@ struct airoha_queue {
 };
 
 struct airoha_tx_irq_queue {
-	struct airoha_eth *eth;
+	struct airoha_qdma *qdma;
 
 	struct napi_struct napi;
 	u32 *q;
@@ -782,9 +785,30 @@ struct airoha_hw_stats {
 	u64 rx_len[7];
 };
 
+struct airoha_qdma {
+	struct airoha_eth *eth;
+	void __iomem *regs;
+
+	/* protect concurrent irqmask accesses */
+	spinlock_t irq_lock;
+	u32 irqmask[QDMA_INT_REG_MAX];
+	int irq;
+
+	struct airoha_tx_irq_queue q_tx_irq[AIROHA_NUM_TX_IRQ];
+
+	struct airoha_queue q_tx[AIROHA_NUM_TX_RING];
+	struct airoha_queue q_rx[AIROHA_NUM_RX_RING];
+
+	/* descriptor and packet buffers for qdma hw forward */
+	struct {
+		void *desc;
+		void *q;
+	} hfwd;
+};
+
 struct airoha_gdm_port {
+	struct airoha_qdma *qdma;
 	struct net_device *dev;
-	struct airoha_eth *eth;
 	int id;
 
 	struct airoha_hw_stats stats;
@@ -794,31 +818,15 @@ struct airoha_eth {
 	struct device *dev;
 
 	unsigned long state;
-
-	void __iomem *qdma_regs;
 	void __iomem *fe_regs;
 
-	/* protect concurrent irqmask accesses */
-	spinlock_t irq_lock;
-	u32 irqmask[QDMA_INT_REG_MAX];
-	int irq;
-
 	struct reset_control_bulk_data rsts[AIROHA_MAX_NUM_RSTS];
 	struct reset_control_bulk_data xsi_rsts[AIROHA_MAX_NUM_XSI_RSTS];
 
-	struct airoha_gdm_port *ports[AIROHA_MAX_NUM_GDM_PORTS];
-
 	struct net_device *napi_dev;
-	struct airoha_queue q_tx[AIROHA_NUM_TX_RING];
-	struct airoha_queue q_rx[AIROHA_NUM_RX_RING];
-
-	struct airoha_tx_irq_queue q_tx_irq[AIROHA_NUM_TX_IRQ];
 
-	/* descriptor and packet buffers for qdma hw forward */
-	struct {
-		void *desc;
-		void *q;
-	} hfwd;
+	struct airoha_qdma qdma[AIROHA_MAX_NUM_QDMA];
+	struct airoha_gdm_port *ports[AIROHA_MAX_NUM_GDM_PORTS];
 };
 
 static u32 airoha_rr(void __iomem *base, u32 offset)
@@ -850,60 +858,72 @@ static u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val)
 #define airoha_fe_clear(eth, offset, val)			\
 	airoha_rmw((eth)->fe_regs, (offset), (val), 0)
 
-#define airoha_qdma_rr(eth, offset)				\
-	airoha_rr((eth)->qdma_regs, (offset))
-#define airoha_qdma_wr(eth, offset, val)			\
-	airoha_wr((eth)->qdma_regs, (offset), (val))
-#define airoha_qdma_rmw(eth, offset, mask, val)			\
-	airoha_rmw((eth)->qdma_regs, (offset), (mask), (val))
-#define airoha_qdma_set(eth, offset, val)			\
-	airoha_rmw((eth)->qdma_regs, (offset), 0, (val))
-#define airoha_qdma_clear(eth, offset, val)			\
-	airoha_rmw((eth)->qdma_regs, (offset), (val), 0)
-
-static void airoha_qdma_set_irqmask(struct airoha_eth *eth, int index,
+#define airoha_qdma_rr(qdma, offset)				\
+	airoha_rr((qdma)->regs, (offset))
+#define airoha_qdma_wr(qdma, offset, val)			\
+	airoha_wr((qdma)->regs, (offset), (val))
+#define airoha_qdma_rmw(qdma, offset, mask, val)		\
+	airoha_rmw((qdma)->regs, (offset), (mask), (val))
+#define airoha_qdma_set(qdma, offset, val)			\
+	airoha_rmw((qdma)->regs, (offset), 0, (val))
+#define airoha_qdma_clear(qdma, offset, val)			\
+	airoha_rmw((qdma)->regs, (offset), (val), 0)
+
+static void airoha_qdma_set_irqmask(struct airoha_qdma *qdma, int index,
 				    u32 clear, u32 set)
 {
 	unsigned long flags;
 
-	if (WARN_ON_ONCE(index >= ARRAY_SIZE(eth->irqmask)))
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(qdma->irqmask)))
 		return;
 
-	spin_lock_irqsave(&eth->irq_lock, flags);
+	spin_lock_irqsave(&qdma->irq_lock, flags);
 
-	eth->irqmask[index] &= ~clear;
-	eth->irqmask[index] |= set;
-	airoha_qdma_wr(eth, REG_INT_ENABLE(index), eth->irqmask[index]);
+	qdma->irqmask[index] &= ~clear;
+	qdma->irqmask[index] |= set;
+	airoha_qdma_wr(qdma, REG_INT_ENABLE(index), qdma->irqmask[index]);
 	/* Read irq_enable register in order to guarantee the update above
 	 * completes in the spinlock critical section.
 	 */
-	airoha_qdma_rr(eth, REG_INT_ENABLE(index));
+	airoha_qdma_rr(qdma, REG_INT_ENABLE(index));
 
-	spin_unlock_irqrestore(&eth->irq_lock, flags);
+	spin_unlock_irqrestore(&qdma->irq_lock, flags);
 }
 
-static void airoha_qdma_irq_enable(struct airoha_eth *eth, int index,
+static void airoha_qdma_irq_enable(struct airoha_qdma *qdma, int index,
 				   u32 mask)
 {
-	airoha_qdma_set_irqmask(eth, index, 0, mask);
+	airoha_qdma_set_irqmask(qdma, index, 0, mask);
 }
 
-static void airoha_qdma_irq_disable(struct airoha_eth *eth, int index,
+static void airoha_qdma_irq_disable(struct airoha_qdma *qdma, int index,
 				    u32 mask)
 {
-	airoha_qdma_set_irqmask(eth, index, mask, 0);
+	airoha_qdma_set_irqmask(qdma, index, mask, 0);
 }
 
-static void airoha_set_macaddr(struct airoha_eth *eth, const u8 *addr)
+static bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port)
 {
-	u32 val;
+	/* GDM1 port on EN7581 SoC is connected to the lan dsa switch.
+	 * GDM{2,3,4} can be used as wan port connected to an external
+	 * phy module.
+	 */
+	return port->id == 1;
+}
 
+static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr)
+{
+	struct airoha_eth *eth = port->qdma->eth;
+	u32 val, reg;
+
+	reg = airhoa_is_lan_gdm_port(port) ? REG_FE_LAN_MAC_H
+					   : REG_FE_WAN_MAC_H;
 	val = (addr[0] << 16) | (addr[1] << 8) | addr[2];
-	airoha_fe_wr(eth, REG_FE_LAN_MAC_H, val);
+	airoha_fe_wr(eth, reg, val);
 
 	val = (addr[3] << 16) | (addr[4] << 8) | addr[5];
-	airoha_fe_wr(eth, REG_FE_LAN_MAC_LMIN, val);
-	airoha_fe_wr(eth, REG_FE_LAN_MAC_LMAX, val);
+	airoha_fe_wr(eth, REG_FE_MAC_LMIN(reg), val);
+	airoha_fe_wr(eth, REG_FE_MAC_LMAX(reg), val);
 }
 
 static void airoha_set_gdm_port_fwd_cfg(struct airoha_eth *eth, u32 addr,
@@ -1383,8 +1403,9 @@ static int airoha_fe_init(struct airoha_eth *eth)
 static int airoha_qdma_fill_rx_queue(struct airoha_queue *q)
 {
 	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
-	struct airoha_eth *eth = q->eth;
-	int qid = q - &eth->q_rx[0];
+	struct airoha_qdma *qdma = q->qdma;
+	struct airoha_eth *eth = qdma->eth;
+	int qid = q - &qdma->q_rx[0];
 	int nframes = 0;
 
 	while (q->queued < q->ndesc - 1) {
@@ -1420,7 +1441,8 @@ static int airoha_qdma_fill_rx_queue(struct airoha_queue *q)
 		WRITE_ONCE(desc->msg2, 0);
 		WRITE_ONCE(desc->msg3, 0);
 
-		airoha_qdma_rmw(eth, REG_RX_CPU_IDX(qid), RX_RING_CPU_IDX_MASK,
+		airoha_qdma_rmw(qdma, REG_RX_CPU_IDX(qid),
+				RX_RING_CPU_IDX_MASK,
 				FIELD_PREP(RX_RING_CPU_IDX_MASK, q->head));
 	}
 
@@ -1450,8 +1472,9 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
 static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
 {
 	enum dma_data_direction dir = page_pool_get_dma_dir(q->page_pool);
-	struct airoha_eth *eth = q->eth;
-	int qid = q - &eth->q_rx[0];
+	struct airoha_qdma *qdma = q->qdma;
+	struct airoha_eth *eth = qdma->eth;
+	int qid = q - &qdma->q_rx[0];
 	int done = 0;
 
 	while (done < budget) {
@@ -1513,7 +1536,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
 static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct airoha_queue *q = container_of(napi, struct airoha_queue, napi);
-	struct airoha_eth *eth = q->eth;
 	int cur, done = 0;
 
 	do {
@@ -1522,14 +1544,14 @@ static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget)
 	} while (cur && done < budget);
 
 	if (done < budget && napi_complete(napi))
-		airoha_qdma_irq_enable(eth, QDMA_INT_REG_IDX1,
+		airoha_qdma_irq_enable(q->qdma, QDMA_INT_REG_IDX1,
 				       RX_DONE_INT_MASK);
 
 	return done;
 }
 
-static int airoha_qdma_init_rx_queue(struct airoha_eth *eth,
-				     struct airoha_queue *q, int ndesc)
+static int airoha_qdma_init_rx_queue(struct airoha_queue *q,
+				     struct airoha_qdma *qdma, int ndesc)
 {
 	const struct page_pool_params pp_params = {
 		.order = 0,
@@ -1538,15 +1560,16 @@ static int airoha_qdma_init_rx_queue(struct airoha_eth *eth,
 		.dma_dir = DMA_FROM_DEVICE,
 		.max_len = PAGE_SIZE,
 		.nid = NUMA_NO_NODE,
-		.dev = eth->dev,
+		.dev = qdma->eth->dev,
 		.napi = &q->napi,
 	};
-	int qid = q - &eth->q_rx[0], thr;
+	struct airoha_eth *eth = qdma->eth;
+	int qid = q - &qdma->q_rx[0], thr;
 	dma_addr_t dma_addr;
 
 	q->buf_size = PAGE_SIZE / 2;
 	q->ndesc = ndesc;
-	q->eth = eth;
+	q->qdma = qdma;
 
 	q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry),
 				GFP_KERNEL);
@@ -1568,14 +1591,15 @@ static int airoha_qdma_init_rx_queue(struct airoha_eth *eth,
 
 	netif_napi_add(eth->napi_dev, &q->napi, airoha_qdma_rx_napi_poll);
 
-	airoha_qdma_wr(eth, REG_RX_RING_BASE(qid), dma_addr);
-	airoha_qdma_rmw(eth, REG_RX_RING_SIZE(qid), RX_RING_SIZE_MASK,
+	airoha_qdma_wr(qdma, REG_RX_RING_BASE(qid), dma_addr);
+	airoha_qdma_rmw(qdma, REG_RX_RING_SIZE(qid),
+			RX_RING_SIZE_MASK,
 			FIELD_PREP(RX_RING_SIZE_MASK, ndesc));
 
 	thr = clamp(ndesc >> 3, 1, 32);
-	airoha_qdma_rmw(eth, REG_RX_RING_SIZE(qid), RX_RING_THR_MASK,
+	airoha_qdma_rmw(qdma, REG_RX_RING_SIZE(qid), RX_RING_THR_MASK,
 			FIELD_PREP(RX_RING_THR_MASK, thr));
-	airoha_qdma_rmw(eth, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK,
+	airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK,
 			FIELD_PREP(RX_RING_DMA_IDX_MASK, q->head));
 
 	airoha_qdma_fill_rx_queue(q);
@@ -1585,7 +1609,7 @@ static int airoha_qdma_init_rx_queue(struct airoha_eth *eth,
 
 static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q)
 {
-	struct airoha_eth *eth = q->eth;
+	struct airoha_eth *eth = q->qdma->eth;
 
 	while (q->queued) {
 		struct airoha_queue_entry *e = &q->entry[q->tail];
@@ -1599,11 +1623,11 @@ static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q)
 	}
 }
 
-static int airoha_qdma_init_rx(struct airoha_eth *eth)
+static int airoha_qdma_init_rx(struct airoha_qdma *qdma)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_rx); i++) {
+	for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
 		int err;
 
 		if (!(RX_DONE_INT_MASK & BIT(i))) {
@@ -1611,7 +1635,7 @@ static int airoha_qdma_init_rx(struct airoha_eth *eth)
 			continue;
 		}
 
-		err = airoha_qdma_init_rx_queue(eth, &eth->q_rx[i],
+		err = airoha_qdma_init_rx_queue(&qdma->q_rx[i], qdma,
 						RX_DSCP_NUM(i));
 		if (err)
 			return err;
@@ -1623,12 +1647,14 @@ static int airoha_qdma_init_rx(struct airoha_eth *eth)
 static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct airoha_tx_irq_queue *irq_q;
+	struct airoha_qdma *qdma;
 	struct airoha_eth *eth;
 	int id, done = 0;
 
 	irq_q = container_of(napi, struct airoha_tx_irq_queue, napi);
-	eth = irq_q->eth;
-	id = irq_q - &eth->q_tx_irq[0];
+	qdma = irq_q->qdma;
+	id = irq_q - &qdma->q_tx_irq[0];
+	eth = qdma->eth;
 
 	while (irq_q->queued > 0 && done < budget) {
 		u32 qid, last, val = irq_q->q[irq_q->head];
@@ -1645,10 +1671,10 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
 		last = FIELD_GET(IRQ_DESC_IDX_MASK, val);
 		qid = FIELD_GET(IRQ_RING_IDX_MASK, val);
 
-		if (qid >= ARRAY_SIZE(eth->q_tx))
+		if (qid >= ARRAY_SIZE(qdma->q_tx))
 			continue;
 
-		q = &eth->q_tx[qid];
+		q = &qdma->q_tx[qid];
 		if (!q->ndesc)
 			continue;
 
@@ -1697,28 +1723,29 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
 		int i, len = done >> 7;
 
 		for (i = 0; i < len; i++)
-			airoha_qdma_rmw(eth, REG_IRQ_CLEAR_LEN(id),
+			airoha_qdma_rmw(qdma, REG_IRQ_CLEAR_LEN(id),
 					IRQ_CLEAR_LEN_MASK, 0x80);
-		airoha_qdma_rmw(eth, REG_IRQ_CLEAR_LEN(id),
+		airoha_qdma_rmw(qdma, REG_IRQ_CLEAR_LEN(id),
 				IRQ_CLEAR_LEN_MASK, (done & 0x7f));
 	}
 
 	if (done < budget && napi_complete(napi))
-		airoha_qdma_irq_enable(eth, QDMA_INT_REG_IDX0,
+		airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0,
 				       TX_DONE_INT_MASK(id));
 
 	return done;
 }
 
-static int airoha_qdma_init_tx_queue(struct airoha_eth *eth,
-				     struct airoha_queue *q, int size)
+static int airoha_qdma_init_tx_queue(struct airoha_queue *q,
+				     struct airoha_qdma *qdma, int size)
 {
-	int i, qid = q - &eth->q_tx[0];
+	struct airoha_eth *eth = qdma->eth;
+	int i, qid = q - &qdma->q_tx[0];
 	dma_addr_t dma_addr;
 
 	spin_lock_init(&q->lock);
 	q->ndesc = size;
-	q->eth = eth;
+	q->qdma = qdma;
 	q->free_thr = 1 + MAX_SKB_FRAGS;
 
 	q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry),
@@ -1738,20 +1765,20 @@ static int airoha_qdma_init_tx_queue(struct airoha_eth *eth,
 		WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val));
 	}
 
-	airoha_qdma_wr(eth, REG_TX_RING_BASE(qid), dma_addr);
-	airoha_qdma_rmw(eth, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK,
+	airoha_qdma_wr(qdma, REG_TX_RING_BASE(qid), dma_addr);
+	airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK,
 			FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head));
-	airoha_qdma_rmw(eth, REG_TX_DMA_IDX(qid), TX_RING_DMA_IDX_MASK,
+	airoha_qdma_rmw(qdma, REG_TX_DMA_IDX(qid), TX_RING_DMA_IDX_MASK,
 			FIELD_PREP(TX_RING_DMA_IDX_MASK, q->head));
 
 	return 0;
 }
 
-static int airoha_qdma_tx_irq_init(struct airoha_eth *eth,
-				   struct airoha_tx_irq_queue *irq_q,
-				   int size)
+static int airoha_qdma_tx_irq_init(struct airoha_tx_irq_queue *irq_q,
+				   struct airoha_qdma *qdma, int size)
 {
-	int id = irq_q - &eth->q_tx_irq[0];
+	int id = irq_q - &qdma->q_tx_irq[0];
+	struct airoha_eth *eth = qdma->eth;
 	dma_addr_t dma_addr;
 
 	netif_napi_add_tx(eth->napi_dev, &irq_q->napi,
@@ -1763,30 +1790,30 @@ static int airoha_qdma_tx_irq_init(struct airoha_eth *eth,
 
 	memset(irq_q->q, 0xff, size * sizeof(u32));
 	irq_q->size = size;
-	irq_q->eth = eth;
+	irq_q->qdma = qdma;
 
-	airoha_qdma_wr(eth, REG_TX_IRQ_BASE(id), dma_addr);
-	airoha_qdma_rmw(eth, REG_TX_IRQ_CFG(id), TX_IRQ_DEPTH_MASK,
+	airoha_qdma_wr(qdma, REG_TX_IRQ_BASE(id), dma_addr);
+	airoha_qdma_rmw(qdma, REG_TX_IRQ_CFG(id), TX_IRQ_DEPTH_MASK,
 			FIELD_PREP(TX_IRQ_DEPTH_MASK, size));
-	airoha_qdma_rmw(eth, REG_TX_IRQ_CFG(id), TX_IRQ_THR_MASK,
+	airoha_qdma_rmw(qdma, REG_TX_IRQ_CFG(id), TX_IRQ_THR_MASK,
 			FIELD_PREP(TX_IRQ_THR_MASK, 1));
 
 	return 0;
 }
 
-static int airoha_qdma_init_tx(struct airoha_eth *eth)
+static int airoha_qdma_init_tx(struct airoha_qdma *qdma)
 {
 	int i, err;
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx_irq); i++) {
-		err = airoha_qdma_tx_irq_init(eth, &eth->q_tx_irq[i],
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) {
+		err = airoha_qdma_tx_irq_init(&qdma->q_tx_irq[i], qdma,
 					      IRQ_QUEUE_LEN(i));
 		if (err)
 			return err;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx); i++) {
-		err = airoha_qdma_init_tx_queue(eth, &eth->q_tx[i],
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) {
+		err = airoha_qdma_init_tx_queue(&qdma->q_tx[i], qdma,
 						TX_DSCP_NUM);
 		if (err)
 			return err;
@@ -1797,7 +1824,7 @@ static int airoha_qdma_init_tx(struct airoha_eth *eth)
 
 static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q)
 {
-	struct airoha_eth *eth = q->eth;
+	struct airoha_eth *eth = q->qdma->eth;
 
 	spin_lock_bh(&q->lock);
 	while (q->queued) {
@@ -1814,34 +1841,35 @@ static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q)
 	spin_unlock_bh(&q->lock);
 }
 
-static int airoha_qdma_init_hfwd_queues(struct airoha_eth *eth)
+static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma)
 {
+	struct airoha_eth *eth = qdma->eth;
 	dma_addr_t dma_addr;
 	u32 status;
 	int size;
 
 	size = HW_DSCP_NUM * sizeof(struct airoha_qdma_fwd_desc);
-	eth->hfwd.desc = dmam_alloc_coherent(eth->dev, size, &dma_addr,
-					     GFP_KERNEL);
-	if (!eth->hfwd.desc)
+	qdma->hfwd.desc = dmam_alloc_coherent(eth->dev, size, &dma_addr,
+					      GFP_KERNEL);
+	if (!qdma->hfwd.desc)
 		return -ENOMEM;
 
-	airoha_qdma_wr(eth, REG_FWD_DSCP_BASE, dma_addr);
+	airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr);
 
 	size = AIROHA_MAX_PACKET_SIZE * HW_DSCP_NUM;
-	eth->hfwd.q = dmam_alloc_coherent(eth->dev, size, &dma_addr,
-					  GFP_KERNEL);
-	if (!eth->hfwd.q)
+	qdma->hfwd.q = dmam_alloc_coherent(eth->dev, size, &dma_addr,
+					   GFP_KERNEL);
+	if (!qdma->hfwd.q)
 		return -ENOMEM;
 
-	airoha_qdma_wr(eth, REG_FWD_BUF_BASE, dma_addr);
+	airoha_qdma_wr(qdma, REG_FWD_BUF_BASE, dma_addr);
 
-	airoha_qdma_rmw(eth, REG_HW_FWD_DSCP_CFG,
+	airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG,
 			HW_FWD_DSCP_PAYLOAD_SIZE_MASK,
 			FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0));
-	airoha_qdma_rmw(eth, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK,
+	airoha_qdma_rmw(qdma, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK,
 			FIELD_PREP(FWD_DSCP_LOW_THR_MASK, 128));
-	airoha_qdma_rmw(eth, REG_LMGR_INIT_CFG,
+	airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG,
 			LMGR_INIT_START | LMGR_SRAM_MODE_MASK |
 			HW_FWD_DESC_NUM_MASK,
 			FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) |
@@ -1849,87 +1877,87 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_eth *eth)
 
 	return read_poll_timeout(airoha_qdma_rr, status,
 				 !(status & LMGR_INIT_START), USEC_PER_MSEC,
-				 30 * USEC_PER_MSEC, true, eth,
+				 30 * USEC_PER_MSEC, true, qdma,
 				 REG_LMGR_INIT_CFG);
 }
 
-static void airoha_qdma_init_qos(struct airoha_eth *eth)
+static void airoha_qdma_init_qos(struct airoha_qdma *qdma)
 {
-	airoha_qdma_clear(eth, REG_TXWRR_MODE_CFG, TWRR_WEIGHT_SCALE_MASK);
-	airoha_qdma_set(eth, REG_TXWRR_MODE_CFG, TWRR_WEIGHT_BASE_MASK);
+	airoha_qdma_clear(qdma, REG_TXWRR_MODE_CFG, TWRR_WEIGHT_SCALE_MASK);
+	airoha_qdma_set(qdma, REG_TXWRR_MODE_CFG, TWRR_WEIGHT_BASE_MASK);
 
-	airoha_qdma_clear(eth, REG_PSE_BUF_USAGE_CFG,
+	airoha_qdma_clear(qdma, REG_PSE_BUF_USAGE_CFG,
 			  PSE_BUF_ESTIMATE_EN_MASK);
 
-	airoha_qdma_set(eth, REG_EGRESS_RATE_METER_CFG,
+	airoha_qdma_set(qdma, REG_EGRESS_RATE_METER_CFG,
 			EGRESS_RATE_METER_EN_MASK |
 			EGRESS_RATE_METER_EQ_RATE_EN_MASK);
 	/* 2047us x 31 = 63.457ms */
-	airoha_qdma_rmw(eth, REG_EGRESS_RATE_METER_CFG,
+	airoha_qdma_rmw(qdma, REG_EGRESS_RATE_METER_CFG,
 			EGRESS_RATE_METER_WINDOW_SZ_MASK,
 			FIELD_PREP(EGRESS_RATE_METER_WINDOW_SZ_MASK, 0x1f));
-	airoha_qdma_rmw(eth, REG_EGRESS_RATE_METER_CFG,
+	airoha_qdma_rmw(qdma, REG_EGRESS_RATE_METER_CFG,
 			EGRESS_RATE_METER_TIMESLICE_MASK,
 			FIELD_PREP(EGRESS_RATE_METER_TIMESLICE_MASK, 0x7ff));
 
 	/* ratelimit init */
-	airoha_qdma_set(eth, REG_GLB_TRTCM_CFG, GLB_TRTCM_EN_MASK);
+	airoha_qdma_set(qdma, REG_GLB_TRTCM_CFG, GLB_TRTCM_EN_MASK);
 	/* fast-tick 25us */
-	airoha_qdma_rmw(eth, REG_GLB_TRTCM_CFG, GLB_FAST_TICK_MASK,
+	airoha_qdma_rmw(qdma, REG_GLB_TRTCM_CFG, GLB_FAST_TICK_MASK,
 			FIELD_PREP(GLB_FAST_TICK_MASK, 25));
-	airoha_qdma_rmw(eth, REG_GLB_TRTCM_CFG, GLB_SLOW_TICK_RATIO_MASK,
+	airoha_qdma_rmw(qdma, REG_GLB_TRTCM_CFG, GLB_SLOW_TICK_RATIO_MASK,
 			FIELD_PREP(GLB_SLOW_TICK_RATIO_MASK, 40));
 
-	airoha_qdma_set(eth, REG_EGRESS_TRTCM_CFG, EGRESS_TRTCM_EN_MASK);
-	airoha_qdma_rmw(eth, REG_EGRESS_TRTCM_CFG, EGRESS_FAST_TICK_MASK,
+	airoha_qdma_set(qdma, REG_EGRESS_TRTCM_CFG, EGRESS_TRTCM_EN_MASK);
+	airoha_qdma_rmw(qdma, REG_EGRESS_TRTCM_CFG, EGRESS_FAST_TICK_MASK,
 			FIELD_PREP(EGRESS_FAST_TICK_MASK, 25));
-	airoha_qdma_rmw(eth, REG_EGRESS_TRTCM_CFG,
+	airoha_qdma_rmw(qdma, REG_EGRESS_TRTCM_CFG,
 			EGRESS_SLOW_TICK_RATIO_MASK,
 			FIELD_PREP(EGRESS_SLOW_TICK_RATIO_MASK, 40));
 
-	airoha_qdma_set(eth, REG_INGRESS_TRTCM_CFG, INGRESS_TRTCM_EN_MASK);
-	airoha_qdma_clear(eth, REG_INGRESS_TRTCM_CFG,
+	airoha_qdma_set(qdma, REG_INGRESS_TRTCM_CFG, INGRESS_TRTCM_EN_MASK);
+	airoha_qdma_clear(qdma, REG_INGRESS_TRTCM_CFG,
 			  INGRESS_TRTCM_MODE_MASK);
-	airoha_qdma_rmw(eth, REG_INGRESS_TRTCM_CFG, INGRESS_FAST_TICK_MASK,
+	airoha_qdma_rmw(qdma, REG_INGRESS_TRTCM_CFG, INGRESS_FAST_TICK_MASK,
 			FIELD_PREP(INGRESS_FAST_TICK_MASK, 125));
-	airoha_qdma_rmw(eth, REG_INGRESS_TRTCM_CFG,
+	airoha_qdma_rmw(qdma, REG_INGRESS_TRTCM_CFG,
 			INGRESS_SLOW_TICK_RATIO_MASK,
 			FIELD_PREP(INGRESS_SLOW_TICK_RATIO_MASK, 8));
 
-	airoha_qdma_set(eth, REG_SLA_TRTCM_CFG, SLA_TRTCM_EN_MASK);
-	airoha_qdma_rmw(eth, REG_SLA_TRTCM_CFG, SLA_FAST_TICK_MASK,
+	airoha_qdma_set(qdma, REG_SLA_TRTCM_CFG, SLA_TRTCM_EN_MASK);
+	airoha_qdma_rmw(qdma, REG_SLA_TRTCM_CFG, SLA_FAST_TICK_MASK,
 			FIELD_PREP(SLA_FAST_TICK_MASK, 25));
-	airoha_qdma_rmw(eth, REG_SLA_TRTCM_CFG, SLA_SLOW_TICK_RATIO_MASK,
+	airoha_qdma_rmw(qdma, REG_SLA_TRTCM_CFG, SLA_SLOW_TICK_RATIO_MASK,
 			FIELD_PREP(SLA_SLOW_TICK_RATIO_MASK, 40));
 }
 
-static int airoha_qdma_hw_init(struct airoha_eth *eth)
+static int airoha_qdma_hw_init(struct airoha_qdma *qdma)
 {
 	int i;
 
 	/* clear pending irqs */
-	for (i = 0; i < ARRAY_SIZE(eth->irqmask); i++)
-		airoha_qdma_wr(eth, REG_INT_STATUS(i), 0xffffffff);
+	for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++)
+		airoha_qdma_wr(qdma, REG_INT_STATUS(i), 0xffffffff);
 
 	/* setup irqs */
-	airoha_qdma_irq_enable(eth, QDMA_INT_REG_IDX0, INT_IDX0_MASK);
-	airoha_qdma_irq_enable(eth, QDMA_INT_REG_IDX1, INT_IDX1_MASK);
-	airoha_qdma_irq_enable(eth, QDMA_INT_REG_IDX4, INT_IDX4_MASK);
+	airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0, INT_IDX0_MASK);
+	airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX1, INT_IDX1_MASK);
+	airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX4, INT_IDX4_MASK);
 
 	/* setup irq binding */
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx); i++) {
-		if (!eth->q_tx[i].ndesc)
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) {
+		if (!qdma->q_tx[i].ndesc)
 			continue;
 
 		if (TX_RING_IRQ_BLOCKING_MAP_MASK & BIT(i))
-			airoha_qdma_set(eth, REG_TX_RING_BLOCKING(i),
+			airoha_qdma_set(qdma, REG_TX_RING_BLOCKING(i),
 					TX_RING_IRQ_BLOCKING_CFG_MASK);
 		else
-			airoha_qdma_clear(eth, REG_TX_RING_BLOCKING(i),
+			airoha_qdma_clear(qdma, REG_TX_RING_BLOCKING(i),
 					  TX_RING_IRQ_BLOCKING_CFG_MASK);
 	}
 
-	airoha_qdma_wr(eth, REG_QDMA_GLOBAL_CFG,
+	airoha_qdma_wr(qdma, REG_QDMA_GLOBAL_CFG,
 		       GLOBAL_CFG_RX_2B_OFFSET_MASK |
 		       FIELD_PREP(GLOBAL_CFG_DMA_PREFERENCE_MASK, 3) |
 		       GLOBAL_CFG_CPU_TXR_RR_MASK |
@@ -1940,18 +1968,18 @@ static int airoha_qdma_hw_init(struct airoha_eth *eth)
 		       GLOBAL_CFG_TX_WB_DONE_MASK |
 		       FIELD_PREP(GLOBAL_CFG_MAX_ISSUE_NUM_MASK, 2));
 
-	airoha_qdma_init_qos(eth);
+	airoha_qdma_init_qos(qdma);
 
 	/* disable qdma rx delay interrupt */
-	for (i = 0; i < ARRAY_SIZE(eth->q_rx); i++) {
-		if (!eth->q_rx[i].ndesc)
+	for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+		if (!qdma->q_rx[i].ndesc)
 			continue;
 
-		airoha_qdma_clear(eth, REG_RX_DELAY_INT_IDX(i),
+		airoha_qdma_clear(qdma, REG_RX_DELAY_INT_IDX(i),
 				  RX_DELAY_INT_MASK);
 	}
 
-	airoha_qdma_set(eth, REG_TXQ_CNGST_CFG,
+	airoha_qdma_set(qdma, REG_TXQ_CNGST_CFG,
 			TXQ_CNGST_DROP_EN | TXQ_CNGST_DEI_DROP_EN);
 
 	return 0;
@@ -1959,150 +1987,180 @@ static int airoha_qdma_hw_init(struct airoha_eth *eth)
 
 static irqreturn_t airoha_irq_handler(int irq, void *dev_instance)
 {
-	struct airoha_eth *eth = dev_instance;
-	u32 intr[ARRAY_SIZE(eth->irqmask)];
+	struct airoha_qdma *qdma = dev_instance;
+	u32 intr[ARRAY_SIZE(qdma->irqmask)];
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(eth->irqmask); i++) {
-		intr[i] = airoha_qdma_rr(eth, REG_INT_STATUS(i));
-		intr[i] &= eth->irqmask[i];
-		airoha_qdma_wr(eth, REG_INT_STATUS(i), intr[i]);
+	for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++) {
+		intr[i] = airoha_qdma_rr(qdma, REG_INT_STATUS(i));
+		intr[i] &= qdma->irqmask[i];
+		airoha_qdma_wr(qdma, REG_INT_STATUS(i), intr[i]);
 	}
 
-	if (!test_bit(DEV_STATE_INITIALIZED, &eth->state))
+	if (!test_bit(DEV_STATE_INITIALIZED, &qdma->eth->state))
 		return IRQ_NONE;
 
 	if (intr[1] & RX_DONE_INT_MASK) {
-		airoha_qdma_irq_disable(eth, QDMA_INT_REG_IDX1,
+		airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX1,
 					RX_DONE_INT_MASK);
 
-		for (i = 0; i < ARRAY_SIZE(eth->q_rx); i++) {
-			if (!eth->q_rx[i].ndesc)
+		for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+			if (!qdma->q_rx[i].ndesc)
 				continue;
 
 			if (intr[1] & BIT(i))
-				napi_schedule(&eth->q_rx[i].napi);
+				napi_schedule(&qdma->q_rx[i].napi);
 		}
 	}
 
 	if (intr[0] & INT_TX_MASK) {
-		for (i = 0; i < ARRAY_SIZE(eth->q_tx_irq); i++) {
-			struct airoha_tx_irq_queue *irq_q = &eth->q_tx_irq[i];
+		for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) {
+			struct airoha_tx_irq_queue *irq_q = &qdma->q_tx_irq[i];
 			u32 status, head;
 
 			if (!(intr[0] & TX_DONE_INT_MASK(i)))
 				continue;
 
-			airoha_qdma_irq_disable(eth, QDMA_INT_REG_IDX0,
+			airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX0,
 						TX_DONE_INT_MASK(i));
 
-			status = airoha_qdma_rr(eth, REG_IRQ_STATUS(i));
+			status = airoha_qdma_rr(qdma, REG_IRQ_STATUS(i));
 			head = FIELD_GET(IRQ_HEAD_IDX_MASK, status);
 			irq_q->head = head % irq_q->size;
 			irq_q->queued = FIELD_GET(IRQ_ENTRY_LEN_MASK, status);
 
-			napi_schedule(&eth->q_tx_irq[i].napi);
+			napi_schedule(&qdma->q_tx_irq[i].napi);
 		}
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int airoha_qdma_init(struct airoha_eth *eth)
+static int airoha_qdma_init(struct platform_device *pdev,
+			    struct airoha_eth *eth,
+			    struct airoha_qdma *qdma)
 {
-	int err;
+	int err, id = qdma - &eth->qdma[0];
+	const char *res;
 
-	err = devm_request_irq(eth->dev, eth->irq, airoha_irq_handler,
-			       IRQF_SHARED, KBUILD_MODNAME, eth);
-	if (err)
-		return err;
+	spin_lock_init(&qdma->irq_lock);
+	qdma->eth = eth;
 
-	err = airoha_qdma_init_rx(eth);
+	res = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d", id);
+	if (!res)
+		return -ENOMEM;
+
+	qdma->regs = devm_platform_ioremap_resource_byname(pdev, res);
+	if (IS_ERR(qdma->regs))
+		return dev_err_probe(eth->dev, PTR_ERR(qdma->regs),
+				     "failed to iomap qdma%d regs\n", id);
+
+	qdma->irq = platform_get_irq(pdev, 4 * id);
+	if (qdma->irq < 0)
+		return qdma->irq;
+
+	err = devm_request_irq(eth->dev, qdma->irq, airoha_irq_handler,
+			       IRQF_SHARED, KBUILD_MODNAME, qdma);
 	if (err)
 		return err;
 
-	err = airoha_qdma_init_tx(eth);
+	err = airoha_qdma_init_rx(qdma);
 	if (err)
 		return err;
 
-	err = airoha_qdma_init_hfwd_queues(eth);
+	err = airoha_qdma_init_tx(qdma);
 	if (err)
 		return err;
 
-	err = airoha_qdma_hw_init(eth);
+	err = airoha_qdma_init_hfwd_queues(qdma);
 	if (err)
 		return err;
 
-	set_bit(DEV_STATE_INITIALIZED, &eth->state);
-
-	return 0;
+	return airoha_qdma_hw_init(qdma);
 }
 
-static int airoha_hw_init(struct airoha_eth *eth)
+static int airoha_hw_init(struct platform_device *pdev,
+			  struct airoha_eth *eth)
 {
-	int err;
+	int err, i;
 
 	/* disable xsi */
-	reset_control_bulk_assert(ARRAY_SIZE(eth->xsi_rsts), eth->xsi_rsts);
+	err = reset_control_bulk_assert(ARRAY_SIZE(eth->xsi_rsts),
+					eth->xsi_rsts);
+	if (err)
+		return err;
+
+	err = reset_control_bulk_assert(ARRAY_SIZE(eth->rsts), eth->rsts);
+	if (err)
+		return err;
 
-	reset_control_bulk_assert(ARRAY_SIZE(eth->rsts), eth->rsts);
-	msleep(20);
-	reset_control_bulk_deassert(ARRAY_SIZE(eth->rsts), eth->rsts);
 	msleep(20);
+	err = reset_control_bulk_deassert(ARRAY_SIZE(eth->rsts), eth->rsts);
+	if (err)
+		return err;
 
+	msleep(20);
 	err = airoha_fe_init(eth);
 	if (err)
 		return err;
 
-	return airoha_qdma_init(eth);
+	for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) {
+		err = airoha_qdma_init(pdev, eth, &eth->qdma[i]);
+		if (err)
+			return err;
+	}
+
+	set_bit(DEV_STATE_INITIALIZED, &eth->state);
+
+	return 0;
 }
 
-static void airoha_hw_cleanup(struct airoha_eth *eth)
+static void airoha_hw_cleanup(struct airoha_qdma *qdma)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_rx); i++) {
-		if (!eth->q_rx[i].ndesc)
+	for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+		if (!qdma->q_rx[i].ndesc)
 			continue;
 
-		napi_disable(&eth->q_rx[i].napi);
-		netif_napi_del(&eth->q_rx[i].napi);
-		airoha_qdma_cleanup_rx_queue(&eth->q_rx[i]);
-		if (eth->q_rx[i].page_pool)
-			page_pool_destroy(eth->q_rx[i].page_pool);
+		napi_disable(&qdma->q_rx[i].napi);
+		netif_napi_del(&qdma->q_rx[i].napi);
+		airoha_qdma_cleanup_rx_queue(&qdma->q_rx[i]);
+		if (qdma->q_rx[i].page_pool)
+			page_pool_destroy(qdma->q_rx[i].page_pool);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx_irq); i++) {
-		napi_disable(&eth->q_tx_irq[i].napi);
-		netif_napi_del(&eth->q_tx_irq[i].napi);
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) {
+		napi_disable(&qdma->q_tx_irq[i].napi);
+		netif_napi_del(&qdma->q_tx_irq[i].napi);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx); i++) {
-		if (!eth->q_tx[i].ndesc)
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) {
+		if (!qdma->q_tx[i].ndesc)
 			continue;
 
-		airoha_qdma_cleanup_tx_queue(&eth->q_tx[i]);
+		airoha_qdma_cleanup_tx_queue(&qdma->q_tx[i]);
 	}
 }
 
-static void airoha_qdma_start_napi(struct airoha_eth *eth)
+static void airoha_qdma_start_napi(struct airoha_qdma *qdma)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_tx_irq); i++)
-		napi_enable(&eth->q_tx_irq[i].napi);
+	for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++)
+		napi_enable(&qdma->q_tx_irq[i].napi);
 
-	for (i = 0; i < ARRAY_SIZE(eth->q_rx); i++) {
-		if (!eth->q_rx[i].ndesc)
+	for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+		if (!qdma->q_rx[i].ndesc)
 			continue;
 
-		napi_enable(&eth->q_rx[i].napi);
+		napi_enable(&qdma->q_rx[i].napi);
 	}
 }
 
 static void airoha_update_hw_stats(struct airoha_gdm_port *port)
 {
-	struct airoha_eth *eth = port->eth;
+	struct airoha_eth *eth = port->qdma->eth;
 	u32 val, i = 0;
 
 	spin_lock(&port->stats.lock);
@@ -2247,23 +2305,24 @@ static void airoha_update_hw_stats(struct airoha_gdm_port *port)
 static int airoha_dev_open(struct net_device *dev)
 {
 	struct airoha_gdm_port *port = netdev_priv(dev);
-	struct airoha_eth *eth = port->eth;
+	struct airoha_qdma *qdma = port->qdma;
 	int err;
 
 	netif_tx_start_all_queues(dev);
-	err = airoha_set_gdm_ports(eth, true);
+	err = airoha_set_gdm_ports(qdma->eth, true);
 	if (err)
 		return err;
 
 	if (netdev_uses_dsa(dev))
-		airoha_fe_set(eth, REG_GDM_INGRESS_CFG(port->id),
+		airoha_fe_set(qdma->eth, REG_GDM_INGRESS_CFG(port->id),
 			      GDM_STAG_EN_MASK);
 	else
-		airoha_fe_clear(eth, REG_GDM_INGRESS_CFG(port->id),
+		airoha_fe_clear(qdma->eth, REG_GDM_INGRESS_CFG(port->id),
 				GDM_STAG_EN_MASK);
 
-	airoha_qdma_set(eth, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_TX_DMA_EN_MASK);
-	airoha_qdma_set(eth, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_RX_DMA_EN_MASK);
+	airoha_qdma_set(qdma, REG_QDMA_GLOBAL_CFG,
+			GLOBAL_CFG_TX_DMA_EN_MASK |
+			GLOBAL_CFG_RX_DMA_EN_MASK);
 
 	return 0;
 }
@@ -2271,16 +2330,17 @@ static int airoha_dev_open(struct net_device *dev)
 static int airoha_dev_stop(struct net_device *dev)
 {
 	struct airoha_gdm_port *port = netdev_priv(dev);
-	struct airoha_eth *eth = port->eth;
+	struct airoha_qdma *qdma = port->qdma;
 	int err;
 
 	netif_tx_disable(dev);
-	err = airoha_set_gdm_ports(eth, false);
+	err = airoha_set_gdm_ports(qdma->eth, false);
 	if (err)
 		return err;
 
-	airoha_qdma_clear(eth, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_TX_DMA_EN_MASK);
-	airoha_qdma_clear(eth, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_RX_DMA_EN_MASK);
+	airoha_qdma_clear(qdma, REG_QDMA_GLOBAL_CFG,
+			  GLOBAL_CFG_TX_DMA_EN_MASK |
+			  GLOBAL_CFG_RX_DMA_EN_MASK);
 
 	return 0;
 }
@@ -2294,7 +2354,7 @@ static int airoha_dev_set_macaddr(struct net_device *dev, void *p)
 	if (err)
 		return err;
 
-	airoha_set_macaddr(port->eth, dev->dev_addr);
+	airoha_set_macaddr(port, dev->dev_addr);
 
 	return 0;
 }
@@ -2303,7 +2363,7 @@ static int airoha_dev_init(struct net_device *dev)
 {
 	struct airoha_gdm_port *port = netdev_priv(dev);
 
-	airoha_set_macaddr(port->eth, dev->dev_addr);
+	airoha_set_macaddr(port, dev->dev_addr);
 
 	return 0;
 }
@@ -2337,7 +2397,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 	struct airoha_gdm_port *port = netdev_priv(dev);
 	u32 msg0 = 0, msg1, len = skb_headlen(skb);
 	int i, qid = skb_get_queue_mapping(skb);
-	struct airoha_eth *eth = port->eth;
+	struct airoha_qdma *qdma = port->qdma;
 	u32 nr_frags = 1 + sinfo->nr_frags;
 	struct netdev_queue *txq;
 	struct airoha_queue *q;
@@ -2367,7 +2427,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 	msg1 = FIELD_PREP(QDMA_ETH_TXMSG_FPORT_MASK, fport) |
 	       FIELD_PREP(QDMA_ETH_TXMSG_METER_MASK, 0x7f);
 
-	q = &eth->q_tx[qid];
+	q = &qdma->q_tx[qid];
 	if (WARN_ON_ONCE(!q->ndesc))
 		goto error;
 
@@ -2411,7 +2471,8 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 		e->dma_addr = addr;
 		e->dma_len = len;
 
-		airoha_qdma_rmw(eth, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK,
+		airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid),
+				TX_RING_CPU_IDX_MASK,
 				FIELD_PREP(TX_RING_CPU_IDX_MASK, index));
 
 		data = skb_frag_address(frag);
@@ -2448,7 +2509,7 @@ static void airoha_ethtool_get_drvinfo(struct net_device *dev,
 				       struct ethtool_drvinfo *info)
 {
 	struct airoha_gdm_port *port = netdev_priv(dev);
-	struct airoha_eth *eth = port->eth;
+	struct airoha_eth *eth = port->qdma->eth;
 
 	strscpy(info->driver, eth->dev->driver->name, sizeof(info->driver));
 	strscpy(info->bus_info, dev_name(eth->dev), sizeof(info->bus_info));
@@ -2529,6 +2590,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
 {
 	const __be32 *id_ptr = of_get_property(np, "reg", NULL);
 	struct airoha_gdm_port *port;
+	struct airoha_qdma *qdma;
 	struct net_device *dev;
 	int err, index;
 	u32 id;
@@ -2558,6 +2620,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
 		return -ENOMEM;
 	}
 
+	qdma = &eth->qdma[index % AIROHA_MAX_NUM_QDMA];
 	dev->netdev_ops = &airoha_netdev_ops;
 	dev->ethtool_ops = &airoha_ethtool_ops;
 	dev->max_mtu = AIROHA_MAX_MTU;
@@ -2567,6 +2630,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
 			   NETIF_F_SG | NETIF_F_TSO;
 	dev->features |= dev->hw_features;
 	dev->dev.of_node = np;
+	dev->irq = qdma->irq;
 	SET_NETDEV_DEV(dev, eth->dev);
 
 	err = of_get_ethdev_address(np, dev);
@@ -2582,8 +2646,8 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
 	port = netdev_priv(dev);
 	u64_stats_init(&port->stats.syncp);
 	spin_lock_init(&port->stats.lock);
+	port->qdma = qdma;
 	port->dev = dev;
-	port->eth = eth;
 	port->id = id;
 	eth->ports[index] = port;
 
@@ -2613,11 +2677,6 @@ static int airoha_probe(struct platform_device *pdev)
 		return dev_err_probe(eth->dev, PTR_ERR(eth->fe_regs),
 				     "failed to iomap fe regs\n");
 
-	eth->qdma_regs = devm_platform_ioremap_resource_byname(pdev, "qdma0");
-	if (IS_ERR(eth->qdma_regs))
-		return dev_err_probe(eth->dev, PTR_ERR(eth->qdma_regs),
-				     "failed to iomap qdma regs\n");
-
 	eth->rsts[0].id = "fe";
 	eth->rsts[1].id = "pdma";
 	eth->rsts[2].id = "qdma";
@@ -2642,11 +2701,6 @@ static int airoha_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	spin_lock_init(&eth->irq_lock);
-	eth->irq = platform_get_irq(pdev, 0);
-	if (eth->irq < 0)
-		return eth->irq;
-
 	eth->napi_dev = alloc_netdev_dummy(0);
 	if (!eth->napi_dev)
 		return -ENOMEM;
@@ -2656,11 +2710,13 @@ static int airoha_probe(struct platform_device *pdev)
 	strscpy(eth->napi_dev->name, "qdma_eth", sizeof(eth->napi_dev->name));
 	platform_set_drvdata(pdev, eth);
 
-	err = airoha_hw_init(eth);
+	err = airoha_hw_init(pdev, eth);
 	if (err)
 		goto error;
 
-	airoha_qdma_start_napi(eth);
+	for (i = 0; i < ARRAY_SIZE(eth->qdma); i++)
+		airoha_qdma_start_napi(&eth->qdma[i]);
+
 	for_each_child_of_node(pdev->dev.of_node, np) {
 		if (!of_device_is_compatible(np, "airoha,eth-mac"))
 			continue;
@@ -2678,7 +2734,9 @@ static int airoha_probe(struct platform_device *pdev)
 	return 0;
 
 error:
-	airoha_hw_cleanup(eth);
+	for (i = 0; i < ARRAY_SIZE(eth->qdma); i++)
+		airoha_hw_cleanup(&eth->qdma[i]);
+
 	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
 		struct airoha_gdm_port *port = eth->ports[i];
 
@@ -2696,7 +2754,9 @@ static void airoha_remove(struct platform_device *pdev)
 	struct airoha_eth *eth = platform_get_drvdata(pdev);
 	int i;
 
-	airoha_hw_cleanup(eth);
+	for (i = 0; i < ARRAY_SIZE(eth->qdma); i++)
+		airoha_hw_cleanup(&eth->qdma[i]);
+
 	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
 		struct airoha_gdm_port *port = eth->ports[i];
 
@@ -2715,6 +2775,7 @@ static const struct of_device_id of_airoha_match[] = {
 	{ .compatible = "airoha,en7581-eth" },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, of_airoha_match);
 
 static struct platform_driver airoha_driver = {
 	.probe = airoha_probe,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eb1708b43aa3..0d5225f1d3ee 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -724,12 +724,8 @@ enum mtk_clks_map {
 	MTK_CLK_ETHWARP_WOCPU2,
 	MTK_CLK_ETHWARP_WOCPU1,
 	MTK_CLK_ETHWARP_WOCPU0,
-	MTK_CLK_TOP_USXGMII_SBUS_0_SEL,
-	MTK_CLK_TOP_USXGMII_SBUS_1_SEL,
 	MTK_CLK_TOP_SGM_0_SEL,
 	MTK_CLK_TOP_SGM_1_SEL,
-	MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL,
-	MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL,
 	MTK_CLK_TOP_ETH_GMII_SEL,
 	MTK_CLK_TOP_ETH_REFCK_50M_SEL,
 	MTK_CLK_TOP_ETH_SYS_200M_SEL,
@@ -800,19 +796,9 @@ enum mtk_clks_map {
 				 BIT_ULL(MTK_CLK_GP3) | BIT_ULL(MTK_CLK_XGP1) | \
 				 BIT_ULL(MTK_CLK_XGP2) | BIT_ULL(MTK_CLK_XGP3) | \
 				 BIT_ULL(MTK_CLK_CRYPTO) | \
-				 BIT_ULL(MTK_CLK_SGMII_TX_250M) | \
-				 BIT_ULL(MTK_CLK_SGMII_RX_250M) | \
-				 BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \
-				 BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \
 				 BIT_ULL(MTK_CLK_ETHWARP_WOCPU2) | \
 				 BIT_ULL(MTK_CLK_ETHWARP_WOCPU1) | \
 				 BIT_ULL(MTK_CLK_ETHWARP_WOCPU0) | \
-				 BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_0_SEL) | \
-				 BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_1_SEL) | \
-				 BIT_ULL(MTK_CLK_TOP_SGM_0_SEL) | \
-				 BIT_ULL(MTK_CLK_TOP_SGM_1_SEL) | \
-				 BIT_ULL(MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL) | \
-				 BIT_ULL(MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL) | \
 				 BIT_ULL(MTK_CLK_TOP_ETH_GMII_SEL) | \
 				 BIT_ULL(MTK_CLK_TOP_ETH_REFCK_50M_SEL) | \
 				 BIT_ULL(MTK_CLK_TOP_ETH_SYS_200M_SEL) | \
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 0acee405a749..ada852adc5f7 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -8,8 +8,11 @@
 #include <linux/platform_device.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+
 #include <net/dst_metadata.h>
 #include <net/dsa.h>
+#include <net/ipv6.h>
+
 #include "mtk_eth_soc.h"
 #include "mtk_ppe.h"
 #include "mtk_ppe_regs.h"
@@ -338,7 +341,6 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_eth *eth,
 {
 	int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
 	u32 *src, *dest;
-	int i;
 
 	switch (type) {
 	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
@@ -359,10 +361,8 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_eth *eth,
 		return -EINVAL;
 	}
 
-	for (i = 0; i < 4; i++)
-		src[i] = be32_to_cpu(src_addr[i]);
-	for (i = 0; i < 4; i++)
-		dest[i] = be32_to_cpu(dest_addr[i]);
+	ipv6_addr_be32_to_cpu(src, src_addr);
+	ipv6_addr_be32_to_cpu(dest, dest_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
index 1a97feca77f2..570ebf91f693 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -3,6 +3,9 @@
 
 #include <linux/kernel.h>
 #include <linux/debugfs.h>
+
+#include <net/ipv6.h>
+
 #include "mtk_eth_soc.h"
 
 struct mtk_flow_addr_info
@@ -47,16 +50,14 @@ static const char *mtk_foe_pkt_type_str(int type)
 static void
 mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
 {
-	__be32 n_addr[4];
-	int i;
+	__be32 n_addr[IPV6_ADDR_WORDS];
 
 	if (!ipv6) {
 		seq_printf(m, "%pI4h", addr);
 		return;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(n_addr); i++)
-		n_addr[i] = htonl(addr[i]);
+	ipv6_addr_cpu_to_be32(n_addr, addr);
 	seq_printf(m, "%pI6", n_addr);
 }
 
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 61334a71058c..e212a4ba9275 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri
 {
 	struct mtk_wed_flow_block_priv *priv = cb_priv;
 	struct flow_cls_offload *cls = type_data;
-	struct mtk_wed_hw *hw = priv->hw;
+	struct mtk_wed_hw *hw = NULL;
 
-	if (!tc_can_offload(priv->dev))
+	if (!priv || !tc_can_offload(priv->dev))
 		return -EOPNOTSUPP;
 
 	if (type != TC_SETUP_CLSFLOWER)
 		return -EOPNOTSUPP;
 
+	hw = priv->hw;
 	return mtk_flow_offload_cmd(hw->eth, cls, hw->index);
 }
 
@@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev,
 			flow_block_cb_remove(block_cb, f);
 			list_del(&block_cb->driver_list);
 			kfree(block_cb->cb_priv);
+			block_cb->cb_priv = NULL;
 		}
 		return 0;
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 943d6918c2ec..cd17a3f4faf8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -2036,20 +2036,20 @@ static int mlx4_en_get_module_info(struct net_device *dev,
 	switch (data[0] /* identifier */) {
 	case MLX4_MODULE_ID_QSFP:
 		modinfo->type = ETH_MODULE_SFF_8436;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		break;
 	case MLX4_MODULE_ID_QSFP_PLUS:
 		if (data[1] >= 0x3) { /* revision id */
 			modinfo->type = ETH_MODULE_SFF_8636;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
 		} else {
 			modinfo->type = ETH_MODULE_SFF_8436;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		}
 		break;
 	case MLX4_MODULE_ID_QSFP28:
 		modinfo->type = ETH_MODULE_SFF_8636;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
 		break;
 	case MLX4_MODULE_ID_SFP:
 		modinfo->type = ETH_MODULE_SFF_8472;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 685335832a93..ea6070180c96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -172,6 +172,16 @@ config MLX5_SW_STEERING
 	help
 	Build support for software-managed steering in the NIC.
 
+config MLX5_HW_STEERING
+	bool "Mellanox Technologies hardware-managed steering"
+	depends on MLX5_CORE_EN && MLX5_ESWITCH
+	default y
+	help
+	Build support for Hardware-Managed Flow Steering (HMFS) in the NIC.
+	HMFS is a new approach to managing steering rules where STEs are
+	written to ICM by HW (as opposed to SW in software-managed steering),
+	which allows higher rate of rule insertion.
+
 config MLX5_SF
 	bool "Mellanox Technologies subfunction device support using auxiliary device"
 	depends on MLX5_CORE && MLX5_CORE_EN
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 1289475e7be7..5912f7e614f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -119,6 +119,27 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
 					steering/dr_action.o steering/fs_dr.o \
 					steering/dr_definer.o steering/dr_ptrn.o \
 					steering/dr_arg.o steering/dr_dbg.o lib/smfs.o
+
+#
+# HW Steering
+#
+mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/mlx5hws_cmd.o \
+					steering/hws/mlx5hws_context.o \
+					steering/hws/mlx5hws_pat_arg.o \
+					steering/hws/mlx5hws_buddy.o \
+					steering/hws/mlx5hws_pool.o \
+					steering/hws/mlx5hws_table.o \
+					steering/hws/mlx5hws_action.o \
+					steering/hws/mlx5hws_rule.o \
+					steering/hws/mlx5hws_matcher.o \
+					steering/hws/mlx5hws_send.o \
+					steering/hws/mlx5hws_definer.o \
+					steering/hws/mlx5hws_bwc.o \
+					steering/hws/mlx5hws_debug.o \
+					steering/hws/mlx5hws_vport.o \
+					steering/hws/mlx5hws_bwc_complex.o
+
+
 #
 # SF device
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 20768ef2e9d2..a64d96effb9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -754,6 +754,8 @@ static const char *cmd_status_str(u8 status)
 		return "bad resource";
 	case MLX5_CMD_STAT_RES_BUSY:
 		return "resource busy";
+	case MLX5_CMD_STAT_NOT_READY:
+		return "FW not ready";
 	case MLX5_CMD_STAT_LIM_ERR:
 		return "limits exceeded";
 	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
@@ -787,6 +789,7 @@ static int cmd_status_to_err(u8 status)
 	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:		return -EIO;
 	case MLX5_CMD_STAT_BAD_RES_ERR:			return -EINVAL;
 	case MLX5_CMD_STAT_RES_BUSY:			return -EBUSY;
+	case MLX5_CMD_STAT_NOT_READY:			return -EAGAIN;
 	case MLX5_CMD_STAT_LIM_ERR:			return -ENOMEM;
 	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:		return -EINVAL;
 	case MLX5_CMD_STAT_IX_ERR:			return -EINVAL;
@@ -815,14 +818,16 @@ EXPORT_SYMBOL(mlx5_cmd_out_err);
 static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
 {
 	u16 opcode, op_mod;
+	u8 status;
 	u16 uid;
 
 	opcode = in_to_opcode(in);
 	op_mod = MLX5_GET(mbox_in, in, op_mod);
 	uid    = MLX5_GET(mbox_in, in, uid);
+	status = MLX5_GET(mbox_out, out, status);
 
 	if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY &&
-	    opcode != MLX5_CMD_OP_CREATE_UCTX)
+	    opcode != MLX5_CMD_OP_CREATE_UCTX && status != MLX5_CMD_STAT_NOT_READY)
 		mlx5_cmd_out_err(dev, opcode, op_mod, out);
 }
 
@@ -1882,10 +1887,12 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 
 	throttle_op = mlx5_cmd_is_throttle_opcode(opcode);
 	if (throttle_op) {
-		/* atomic context may not sleep */
-		if (callback)
-			return -EINVAL;
-		down(&dev->cmd.vars.throttle_sem);
+		if (callback) {
+			if (down_trylock(&dev->cmd.vars.throttle_sem))
+				return -EBUSY;
+		} else {
+			down(&dev->cmd.vars.throttle_sem);
+		}
 	}
 
 	pages_queue = is_manage_pages(in);
@@ -2091,10 +2098,19 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
 {
 	struct mlx5_async_work *work = _work;
 	struct mlx5_async_ctx *ctx;
+	struct mlx5_core_dev *dev;
+	u16 opcode;
 
 	ctx = work->ctx;
-	status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
+	dev = ctx->dev;
+	opcode = work->opcode;
+	status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out);
 	work->user_callback(status, work);
+	/* Can't access "work" from this point on. It could have been freed in
+	 * the callback.
+	 */
+	if (mlx5_cmd_is_throttle_opcode(opcode))
+		up(&dev->cmd.vars.throttle_sem);
 	if (atomic_dec_and_test(&ctx->num_inflight))
 		complete(&ctx->inflight_done);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index ddf1b87f1bc0..9aed29fa4900 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -203,10 +203,10 @@ TRACE_EVENT(mlx5_fs_set_fte,
 			   fs_get_obj(__entry->fg, fte->node.parent);
 			   __entry->group_index = __entry->fg->id;
 			   __entry->index = fte->index;
-			   __entry->action = fte->action.action;
+			   __entry->action = fte->act_dests.action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->flow_context.flow_tag;
-			   __entry->flow_source = fte->flow_context.flow_source;
+			   __entry->flow_tag = fte->act_dests.flow_context.flow_tag;
+			   __entry->flow_source = fte->act_dests.flow_context.flow_source;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
@@ -284,7 +284,7 @@ TRACE_EVENT(mlx5_fs_add_rule,
 	    TP_fast_assign(
 			   __entry->rule = rule;
 			   fs_get_obj(__entry->fte, rule->node.parent);
-			   __entry->index = __entry->fte->dests_size - 1;
+			   __entry->index = __entry->fte->act_dests.dests_size - 1;
 			   __entry->sw_action = rule->sw_action;
 			   memcpy(__entry->destination,
 				  &rule->dest_attr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5fd82c67b6ab..da0a1c65ec4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -130,7 +130,7 @@ struct page_pool;
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
-#define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
+#define MLX5E_DEFAULT_SHAMPO_TIMEOUT			1024
 
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
@@ -998,6 +998,7 @@ void mlx5e_build_ptys2ethtool_map(void);
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
 					    enum mlx5e_mpwrq_umr_mode umr_mode);
 
+void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len);
 void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq);
 void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
@@ -1172,14 +1173,16 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
 				 struct ethtool_ringparam *param,
 				 struct kernel_ethtool_ringparam *kernel_param);
 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
-				struct ethtool_ringparam *param);
+				struct ethtool_ringparam *param,
+				struct netlink_ext_ack *extack);
 void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
 				struct ethtool_channels *ch);
 int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 			       struct ethtool_channels *ch);
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal,
-			       struct kernel_ethtool_coalesce *kernel_coal);
+			       struct kernel_ethtool_coalesce *kernel_coal,
+			       struct netlink_ext_ack *extack);
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal,
 			       struct kernel_ethtool_coalesce *kernel_coal,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 6c9ccccca81e..64b62ed17b07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 			MLX5_SET(wq, wq, log_headers_entry_size,
 				 mlx5e_shampo_get_log_hd_entry_size(mdev, params));
 			MLX5_SET(rqc, rqc, reservation_timeout,
-				 params->packet_merge.timeout);
+				 mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT));
 			MLX5_SET(rqc, rqc, shampo_match_criteria_type,
 				 params->packet_merge.shampo.match_criteria_type);
 			MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
@@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
 	return wqebbs;
 }
 
+#define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
+
+u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+{
+	int i;
+
+	/* The supported periods are organized in ascending order */
+	for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
+		if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
+			break;
+
+	return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
+}
+
 static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
 					struct mlx5e_params *params,
 					struct mlx5e_xsk_param *xsk)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 749b2ec0436e..3f8986f9d862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
 u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev,
 			   struct mlx5e_params *params,
 			   struct mlx5e_rq_param *rq_param);
+u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
 u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
 				   struct mlx5e_params *params,
 				   struct mlx5e_xsk_param *xsk);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 22918b2ef7f1..09433b91be17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
 		return err;
 	}
 
+	mutex_lock(&priv->state_lock);
 	err = mlx5e_safe_reopen_channels(priv);
+	mutex_unlock(&priv->state_lock);
 	if (!err) {
 		to_ctx->status = 1; /* all channels recovered */
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
index bb6b1a979ba1..62b3f7ff5562 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -25,6 +25,8 @@ struct mlx5_ct_fs_ops {
 						struct mlx5_flow_attr *attr,
 						struct flow_rule *flow_rule);
 	void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+	int (*ct_rule_update)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule,
+			      struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr);
 
 	size_t priv_size;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
index ae4f55be48ce..64a82aafaaca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -65,9 +65,30 @@ mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_ru
 	kfree(dmfs_rule);
 }
 
+static int mlx5_ct_fs_dmfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule,
+					  struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr)
+{
+	struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+							      struct mlx5_ct_fs_dmfs_rule,
+							      fs_rule);
+	struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+	struct mlx5_flow_handle *rule;
+
+	rule = mlx5_tc_rule_insert(priv, spec, attr);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+	mlx5_tc_rule_delete(priv, dmfs_rule->rule, dmfs_rule->attr);
+
+	dmfs_rule->rule = rule;
+	dmfs_rule->attr = attr;
+
+	return 0;
+}
+
 static struct mlx5_ct_fs_ops dmfs_ops = {
 	.ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
 	.ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+	.ct_rule_update = mlx5_ct_fs_dmfs_ct_rule_update,
 
 	.init = mlx5_ct_fs_dmfs_init,
 	.destroy = mlx5_ct_fs_dmfs_destroy,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
index 8c531f4ec912..1c062a2e8996 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -368,9 +368,35 @@ mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_ru
 	kfree(smfs_rule);
 }
 
+static int mlx5_ct_fs_smfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule,
+					  struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr)
+{
+	struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+							      struct mlx5_ct_fs_smfs_rule,
+							      fs_rule);
+	struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+	struct mlx5dr_action *actions[3];  /* We only need to create 3 actions, see below. */
+	struct mlx5dr_rule *rule;
+
+	actions[0] = smfs_rule->count_action;
+	actions[1] = attr->modify_hdr->action.dr_action;
+	actions[2] = fs_smfs->fwd_action;
+
+	rule = mlx5_smfs_rule_create(smfs_rule->smfs_matcher->dr_matcher, spec,
+				     ARRAY_SIZE(actions), actions, spec->flow_context.flow_source);
+	if (!rule)
+		return -EINVAL;
+
+	mlx5_smfs_rule_destroy(smfs_rule->rule);
+	smfs_rule->rule = rule;
+
+	return 0;
+}
+
 static struct mlx5_ct_fs_ops fs_smfs_ops = {
 	.ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
 	.ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+	.ct_rule_update = mlx5_ct_fs_smfs_ct_rule_update,
 
 	.init = mlx5_ct_fs_smfs_init,
 	.destroy = mlx5_ct_fs_smfs_destroy,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 8cf8ba2622f2..dcfccaaa8d91 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -876,15 +876,14 @@ err_attr:
 }
 
 static int
-mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
-			      struct flow_rule *flow_rule,
-			      struct mlx5_ct_entry *entry,
-			      bool nat, u8 zone_restore_id)
+mlx5_tc_ct_entry_update_rule(struct mlx5_tc_ct_priv *ct_priv,
+			     struct flow_rule *flow_rule,
+			     struct mlx5_ct_entry *entry,
+			     bool nat, u8 zone_restore_id)
 {
 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 	struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr;
 	struct mlx5e_mod_hdr_handle *mh;
-	struct mlx5_ct_fs_rule *rule;
 	struct mlx5_flow_spec *spec;
 	int err;
 
@@ -902,29 +901,26 @@ mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
 					      nat, mlx5_tc_ct_entry_in_ct_nat_table(entry));
 	if (err) {
-		ct_dbg("Failed to create ct entry mod hdr");
+		ct_dbg("Failed to create ct entry mod hdr, err: %d", err);
 		goto err_mod_hdr;
 	}
 
 	mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
 
-	rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
-	if (IS_ERR(rule)) {
-		err = PTR_ERR(rule);
-		ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat);
+	err = ct_priv->fs_ops->ct_rule_update(ct_priv->fs, zone_rule->rule, spec, attr);
+	if (err) {
+		ct_dbg("Failed to update ct entry rule, nat: %d, err: %d", nat, err);
 		goto err_rule;
 	}
 
-	ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
-	zone_rule->rule = rule;
 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh);
 	zone_rule->mh = mh;
 	mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id);
 
 	kfree(old_attr);
 	kvfree(spec);
-	ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone);
+	ct_dbg("Updated ct entry rule in zone %d", entry->tuple.zone);
 
 	return 0;
 
@@ -932,6 +928,7 @@ err_rule:
 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh);
 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 err_mod_hdr:
+	*attr = *old_attr;
 	kfree(old_attr);
 err_attr:
 	kvfree(spec);
@@ -1140,23 +1137,23 @@ err_orig:
 }
 
 static int
-mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
-			       struct flow_rule *flow_rule,
-			       struct mlx5_ct_entry *entry,
-			       u8 zone_restore_id)
+mlx5_tc_ct_entry_update_rules(struct mlx5_tc_ct_priv *ct_priv,
+			      struct flow_rule *flow_rule,
+			      struct mlx5_ct_entry *entry,
+			      u8 zone_restore_id)
 {
 	int err = 0;
 
 	if (mlx5_tc_ct_entry_in_ct_table(entry)) {
-		err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
-						    zone_restore_id);
+		err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, false,
+						   zone_restore_id);
 		if (err)
 			return err;
 	}
 
 	if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
-		err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
-						    zone_restore_id);
+		err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, true,
+						   zone_restore_id);
 		if (err && mlx5_tc_ct_entry_in_ct_table(entry))
 			mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 	}
@@ -1164,13 +1161,13 @@ mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
 }
 
 static int
-mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
-				      struct mlx5_ct_entry *entry, unsigned long cookie)
+mlx5_tc_ct_block_flow_offload_update(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
+				     struct mlx5_ct_entry *entry, unsigned long cookie)
 {
 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
 	int err;
 
-	err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, ft->zone_restore_id);
+	err = mlx5_tc_ct_entry_update_rules(ct_priv, flow_rule, entry, ft->zone_restore_id);
 	if (!err)
 		return 0;
 
@@ -1215,7 +1212,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 		entry->restore_cookie = meta_action->ct_metadata.cookie;
 		spin_unlock_bh(&ct_priv->ht_lock);
 
-		err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie);
+		err = mlx5_tc_ct_block_flow_offload_update(ft, flow_rule, entry, cookie);
 		mlx5_tc_ct_entry_put(entry);
 		return err;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index 6cc23af66b5b..efb34de4cb7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -109,6 +109,7 @@ struct mlx5e_tc_flow {
 	struct completion init_done;
 	struct completion del_hw_done;
 	struct mlx5_flow_attr *attr;
+	struct mlx5_flow_attr *extra_split_attr;
 	struct list_head attrs;
 	u32 chain_mapping;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 6e00afe4671b..53cfa39188cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -51,9 +51,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
 		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
 			caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD;
 
-		if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
-		     MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) ||
-		    MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+		if (IS_ENABLED(CONFIG_MLX5_CLS_ACT) &&
+		    ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
+		      MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) ||
+		     MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)))
 			caps |= MLX5_IPSEC_CAP_PRIO;
 
 		if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
@@ -126,6 +127,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
 		MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt,
 			 attrs->lft.hard_packet_limit);
 		MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1);
+		MLX5_SET(ipsec_aso, aso_ctx, remove_flow_enable, 1);
 	}
 
 	if (attrs->lft.soft_packet_limit != XFRM_INF) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 00d5661dc62e..1966736f98b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -83,17 +83,15 @@ struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER];
 	({                                                              \
 		struct ptys2ethtool_config *cfg;                        \
 		const unsigned int modes[] = { __VA_ARGS__ };           \
-		unsigned int i, bit, idx;                               \
+		unsigned int i;                                         \
 		cfg = &ptys2##table##_ethtool_table[reg_];		\
 		bitmap_zero(cfg->supported,                             \
 			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
 		bitmap_zero(cfg->advertised,                            \
 			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
 		for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) {             \
-			bit = modes[i] % 64;                            \
-			idx = modes[i] / 64;                            \
-			__set_bit(bit, &cfg->supported[idx]);           \
-			__set_bit(bit, &cfg->advertised[idx]);          \
+			bitmap_set(cfg->supported, modes[i], 1);        \
+			bitmap_set(cfg->advertised, modes[i], 1);       \
 		}                                                       \
 	})
 
@@ -139,6 +137,10 @@ void mlx5e_build_ptys2ethtool_map(void)
 				       ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
 	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
 				       ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100BASE_TX, legacy,
+				       ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_T, legacy,
+				       ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
 	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
 				       ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
 	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
@@ -204,6 +206,12 @@ void mlx5e_build_ptys2ethtool_map(void)
 				       ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
 				       ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
 				       ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_8_400GBASE_CR8, ext,
+				       ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
+				       ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
+				       ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
+				       ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
+				       ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT);
 	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
 				       ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
 				       ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
@@ -354,35 +362,25 @@ static void mlx5e_get_ringparam(struct net_device *dev,
 }
 
 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
-				struct ethtool_ringparam *param)
+				struct ethtool_ringparam *param,
+				struct netlink_ext_ack *extack)
 {
 	struct mlx5e_params new_params;
 	u8 log_rq_size;
 	u8 log_sq_size;
 	int err = 0;
 
-	if (param->rx_jumbo_pending) {
-		netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n",
-			    __func__);
-		return -EINVAL;
-	}
-	if (param->rx_mini_pending) {
-		netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n",
-			    __func__);
-		return -EINVAL;
-	}
-
 	if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
-		netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
-			    __func__, param->rx_pending,
-			    1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+		NL_SET_ERR_MSG_FMT_MOD(extack, "rx (%d) < min (%d)",
+				       param->rx_pending,
+				       1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
 		return -EINVAL;
 	}
 
 	if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
-		netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n",
-			    __func__, param->tx_pending,
-			    1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+		NL_SET_ERR_MSG_FMT_MOD(extack, "tx (%d) < min (%d)",
+				       param->tx_pending,
+				       1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
 		return -EINVAL;
 	}
 
@@ -418,7 +416,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	return mlx5e_ethtool_set_ringparam(priv, param);
+	return mlx5e_ethtool_set_ringparam(priv, param, extack);
 }
 
 void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
@@ -445,7 +443,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 	unsigned int count = ch->combined_count;
 	struct mlx5e_params new_params;
 	bool arfs_enabled;
-	int rss_cnt;
 	bool opened;
 	int err = 0;
 
@@ -499,17 +496,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	/* Don't allow changing the number of channels if non-default RSS contexts exist,
-	 * the kernel doesn't protect against set_channels operations that break them.
-	 */
-	rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1;
-	if (rss_cnt) {
-		err = -EINVAL;
-		netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n",
-			   __func__, rss_cnt);
-		goto out;
-	}
-
 	/* Don't allow changing the number of channels if MQPRIO mode channel offload is active,
 	 * because it defines a partition over the channels queues.
 	 */
@@ -557,12 +543,15 @@ static int mlx5e_set_channels(struct net_device *dev,
 
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal,
-			       struct kernel_ethtool_coalesce *kernel_coal)
+			       struct kernel_ethtool_coalesce *kernel_coal,
+			       struct netlink_ext_ack *extack)
 {
 	struct dim_cq_moder *rx_moder, *tx_moder;
 
-	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
+	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) {
+		NL_SET_ERR_MSG_MOD(extack, "CQ moderation not supported");
 		return -EOPNOTSUPP;
+	}
 
 	rx_moder = &priv->channels.params.rx_cq_moderation;
 	coal->rx_coalesce_usecs		= rx_moder->usec;
@@ -586,7 +575,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal, extack);
 }
 
 static int mlx5e_ethtool_get_per_queue_coalesce(struct mlx5e_priv *priv, u32 queue,
@@ -708,26 +697,34 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 	int err = 0;
 
 	if (!MLX5_CAP_GEN(mdev, cq_moderation) ||
-	    !MLX5_CAP_GEN(mdev, cq_period_mode_modify))
+	    !MLX5_CAP_GEN(mdev, cq_period_mode_modify)) {
+		NL_SET_ERR_MSG_MOD(extack, "CQ moderation not supported");
 		return -EOPNOTSUPP;
+	}
 
 	if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME ||
 	    coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) {
-		netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n",
-			    __func__, MLX5E_MAX_COAL_TIME);
+		NL_SET_ERR_MSG_FMT_MOD(
+			extack,
+			"Max coalesce time %lu usecs, tx-usecs (%u) rx-usecs (%u)",
+			MLX5E_MAX_COAL_TIME, coal->tx_coalesce_usecs,
+			coal->rx_coalesce_usecs);
 		return -ERANGE;
 	}
 
 	if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES ||
 	    coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) {
-		netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n",
-			    __func__, MLX5E_MAX_COAL_FRAMES);
+		NL_SET_ERR_MSG_FMT_MOD(
+			extack,
+			"Max coalesce frames %lu, tx-frames (%u) rx-frames (%u)",
+			MLX5E_MAX_COAL_FRAMES, coal->tx_max_coalesced_frames,
+			coal->rx_max_coalesced_frames);
 		return -ERANGE;
 	}
 
 	if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) &&
 	    !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) {
-		NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device");
+		NL_SET_ERR_MSG_MOD(extack, "cqe-mode-rx/tx is not supported on this device");
 		return -EOPNOTSUPP;
 	}
 
@@ -1299,7 +1296,8 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 	u32 i, ptys_modes = 0;
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (*ptys2legacy_ethtool_table[i].advertised == 0)
+		if (bitmap_empty(ptys2legacy_ethtool_table[i].advertised,
+				 __ETHTOOL_LINK_MODE_MASK_NBITS))
 			continue;
 		if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised,
 				      link_modes,
@@ -1313,18 +1311,18 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
 {
 	u32 i, ptys_modes = 0;
-	unsigned long modes[2];
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes);
 
 	for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) {
-		if (ptys2ext_ethtool_table[i].advertised[0] == 0 &&
-		    ptys2ext_ethtool_table[i].advertised[1] == 0)
+		if (bitmap_empty(ptys2ext_ethtool_table[i].advertised,
+				 __ETHTOOL_LINK_MODE_MASK_NBITS))
 			continue;
-		memset(modes, 0, sizeof(modes));
+		bitmap_zero(modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
 		bitmap_and(modes, ptys2ext_ethtool_table[i].advertised,
 			   link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
 
-		if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] &&
-		    modes[1] == ptys2ext_ethtool_table[i].advertised[1])
+		if (bitmap_equal(modes, ptys2ext_ethtool_table[i].advertised,
+				 __ETHTOOL_LINK_MODE_MASK_NBITS))
 			ptys_modes |= MLX5E_PROT_MASK(i);
 	}
 	return ptys_modes;
@@ -1409,7 +1407,12 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext);
+	err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext);
+	if (err) {
+		netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err);
+		goto out;
+	}
+
 	mlx5_toggle_port_link(mdev);
 
 out:
@@ -2010,8 +2013,10 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
 		if (size_read == -EINVAL)
 			return -EINVAL;
 		if (size_read < 0) {
-			netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n",
-				   __func__, size_read);
+			NL_SET_ERR_MSG_FMT_MOD(
+				extack,
+				"Query module eeprom by page failed, read %u bytes, err %d\n",
+				i, size_read);
 			return i;
 		}
 
@@ -2600,6 +2605,7 @@ static void mlx5e_get_ts_stats(struct net_device *netdev,
 
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.cap_rss_ctx_supported	= true,
+	.rxfh_per_ctx_key	= true,
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
 				     ETHTOOL_COALESCE_USE_ADAPTIVE |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 3eccdadc0357..773624bb2c5d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
 	if (num_tuples <= 0) {
 		netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
 			    __func__, num_tuples);
-		return num_tuples;
+		return num_tuples < 0 ? num_tuples : -EINVAL;
 	}
 
 	eth_ft = get_flow_table(priv, fs, num_tuples);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6f686fabed44..a5659c0c4236 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1016,30 +1016,31 @@ err_rq_xdp_prog:
 
 static void mlx5e_free_rq(struct mlx5e_rq *rq)
 {
-	struct bpf_prog *old_prog;
-
-	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
-		old_prog = rcu_dereference_protected(rq->xdp_prog,
-						     lockdep_is_held(&rq->priv->state_lock));
-		if (old_prog)
-			bpf_prog_put(old_prog);
-	}
+	kvfree(rq->dim);
+	page_pool_destroy(rq->page_pool);
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		mlx5e_rq_free_shampo(rq);
 		kvfree(rq->mpwqe.info);
 		mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
 		mlx5e_free_mpwqe_rq_drop_page(rq);
-		mlx5e_rq_free_shampo(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		mlx5e_free_wqe_alloc_info(rq);
 	}
 
-	kvfree(rq->dim);
-	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
+
+	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
+		struct bpf_prog *old_prog;
+
+		old_prog = rcu_dereference_protected(rq->xdp_prog,
+						     lockdep_is_held(&rq->priv->state_lock));
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
 }
 
 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter)
@@ -1236,6 +1237,14 @@ void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq)
 	rq->mpwqe.actual_wq_head = wq->head;
 	rq->mpwqe.umr_in_progress = 0;
 	rq->mpwqe.umr_completed = 0;
+
+	if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
+		struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+		u16 len;
+
+		len = (shampo->pi - shampo->ci) & shampo->hd_per_wq;
+		mlx5e_shampo_fill_umr(rq, len);
+	}
 }
 
 void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
@@ -3020,15 +3029,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
 					   struct mlx5e_params *params)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
-	int num_comp_vectors, ix, irq;
-
-	num_comp_vectors = mlx5_comp_vectors_max(mdev);
+	int ix;
 
 	for (ix = 0; ix < params->num_channels; ix++) {
+		int num_comp_vectors, irq, vec_ix;
+		struct mlx5_core_dev *mdev;
+
+		mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix);
+		num_comp_vectors = mlx5_comp_vectors_max(mdev);
 		cpumask_clear(priv->scratchpad.cpumask);
+		vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix);
 
-		for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
+		for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) {
 			int cpu = mlx5_comp_vector_get_cpu(mdev, irq);
 
 			cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
@@ -4403,9 +4415,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 
 	if (mlx5e_is_uplink_rep(priv)) {
 		features = mlx5e_fix_uplink_rep_features(netdev, features);
-		features |= NETIF_F_NETNS_LOCAL;
+		netdev->netns_local = true;
 	} else {
-		features &= ~NETIF_F_NETNS_LOCAL;
+		netdev->netns_local = false;
 	}
 
 	mutex_unlock(&priv->state_lock);
@@ -5167,18 +5179,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
 #endif
 };
 
-static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
-{
-	int i;
-
-	/* The supported periods are organized in ascending order */
-	for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
-		if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
-			break;
-
-	return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
-}
-
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
 {
 	struct mlx5e_params *params = &priv->channels.params;
@@ -5308,7 +5308,7 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i,
 	struct mlx5e_rq_stats *rq_stats;
 
 	ASSERT_RTNL();
-	if (mlx5e_is_uplink_rep(priv))
+	if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch)
 		return;
 
 	channel_stats = priv->channel_stats[i];
@@ -5328,6 +5328,9 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i,
 	struct mlx5e_sq_stats *sq_stats;
 
 	ASSERT_RTNL();
+	if (!priv->stats_nch)
+		return;
+
 	/* no special case needed for ptp htb etc since txq2sq_stats is kept up
 	 * to date for active sq_stats, otherwise get_base_stats takes care of
 	 * inactive sqs.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8790d57dc6db..92094bf60d59 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -360,7 +360,7 @@ mlx5e_rep_set_ringparam(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	return mlx5e_ethtool_set_ringparam(priv, param);
+	return mlx5e_ethtool_set_ringparam(priv, param, extack);
 }
 
 static void mlx5e_rep_get_channels(struct net_device *dev,
@@ -386,7 +386,7 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal, extack);
 }
 
 static int mlx5e_rep_set_coalesce(struct net_device *netdev,
@@ -898,7 +898,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev,
 	netdev->hw_features    |= NETIF_F_RXCSUM;
 
 	netdev->features |= netdev->hw_features;
-	netdev->features |= NETIF_F_NETNS_LOCAL;
+
+	netdev->netns_local = true;
 }
 
 static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 225da8d691fc..8e24ba96c779 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -735,6 +735,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
 	ksm_entries = bitmap_find_window(shampo->bitmap,
 					 shampo->hd_per_wqe,
 					 shampo->hd_per_wq, shampo->pi);
+	ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE);
 	if (!ksm_entries)
 		return 0;
 
@@ -962,26 +963,31 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
 	sq->cc = sqcc;
 }
 
-static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
-				       struct mlx5e_icosq *sq)
+void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len)
 {
-	struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
-	struct mlx5e_shampo_hd *shampo;
-	/* assume 1:1 relationship between RQ and icosq */
-	struct mlx5e_rq *rq = &c->rq;
-	int end, from, len = umr.len;
+	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+	int end, from, full_len = len;
 
-	shampo = rq->mpwqe.shampo;
 	end = shampo->hd_per_wq;
 	from = shampo->ci;
-	if (from + len > shampo->hd_per_wq) {
+	if (from + len > end) {
 		len -= end - from;
 		bitmap_set(shampo->bitmap, from, end - from);
 		from = 0;
 	}
 
 	bitmap_set(shampo->bitmap, from, len);
-	shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
+	shampo->ci = (shampo->ci + full_len) & (shampo->hd_per_wq - 1);
+}
+
+static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
+				       struct mlx5e_icosq *sq)
+{
+	struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
+	/* assume 1:1 relationship between RQ and icosq */
+	struct mlx5e_rq *rq = &c->rq;
+
+	mlx5e_shampo_fill_umr(rq, umr.len);
 }
 
 int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
@@ -2340,6 +2346,9 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
 			stats->hds_nodata_packets++;
 			stats->hds_nodata_bytes += head_size;
 		}
+	} else {
+		stats->hds_nosplit_packets++;
+		stats->hds_nosplit_bytes += data_bcnt;
 	}
 
 	mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e7a3290a708a..611ec4b6f370 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -144,6 +144,8 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nodata_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nodata_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nosplit_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nosplit_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
@@ -347,6 +349,8 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
 	s->rx_gro_large_hds           += rq_stats->gro_large_hds;
 	s->rx_hds_nodata_packets      += rq_stats->hds_nodata_packets;
 	s->rx_hds_nodata_bytes        += rq_stats->hds_nodata_bytes;
+	s->rx_hds_nosplit_packets     += rq_stats->hds_nosplit_packets;
+	s->rx_hds_nosplit_bytes       += rq_stats->hds_nosplit_bytes;
 	s->rx_ecn_mark                += rq_stats->ecn_mark;
 	s->rx_removed_vlan_packets    += rq_stats->removed_vlan_packets;
 	s->rx_csum_none               += rq_stats->csum_none;
@@ -2062,6 +2066,8 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, hds_nodata_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, hds_nodata_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, hds_nosplit_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, hds_nosplit_bytes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 4c5858c1dd82..5961c569cfe0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -156,6 +156,8 @@ struct mlx5e_sw_stats {
 	u64 rx_gro_large_hds;
 	u64 rx_hds_nodata_packets;
 	u64 rx_hds_nodata_bytes;
+	u64 rx_hds_nosplit_packets;
+	u64 rx_hds_nosplit_bytes;
 	u64 rx_mcast_packets;
 	u64 rx_ecn_mark;
 	u64 rx_removed_vlan_packets;
@@ -356,6 +358,8 @@ struct mlx5e_rq_stats {
 	u64 gro_large_hds;
 	u64 hds_nodata_packets;
 	u64 hds_nodata_bytes;
+	u64 hds_nosplit_packets;
+	u64 hds_nosplit_bytes;
 	u64 mcast_packets;
 	u64 ecn_mark;
 	u64 removed_vlan_packets;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 30673292e15f..6b3b1afe8312 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1740,10 +1740,118 @@ has_encap_dests(struct mlx5_flow_attr *attr)
 }
 
 static int
+extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+{
+	bool int_dest = false, ext_dest = false;
+	struct mlx5_esw_flow_attr *esw_attr;
+	int i;
+
+	if (flow->attr != attr ||
+	    !list_is_first(&attr->list, &flow->attrs))
+		return 0;
+
+	if (flow_flag_test(flow, SLOW))
+		return 0;
+
+	esw_attr = attr->esw_attr;
+	if (!esw_attr->split_count ||
+	    esw_attr->split_count == esw_attr->out_count - 1)
+		return 0;
+
+	if (esw_attr->dest_int_port &&
+	    (esw_attr->dests[esw_attr->split_count].flags &
+	     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+		return esw_attr->split_count + 1;
+
+	for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
+		/* external dest with encap is considered as internal by firmware */
+		if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK &&
+		    !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID))
+			ext_dest = true;
+		else
+			int_dest = true;
+
+		if (ext_dest && int_dest)
+			return esw_attr->split_count;
+	}
+
+	return 0;
+}
+
+static int
+extra_split_attr_dests(struct mlx5e_tc_flow *flow,
+		       struct mlx5_flow_attr *attr, int split_count)
+{
+	struct mlx5e_post_act *post_act = get_post_action(flow->priv);
+	struct mlx5e_tc_flow_parse_attr *parse_attr, *parse_attr2;
+	struct mlx5_esw_flow_attr *esw_attr, *esw_attr2;
+	struct mlx5e_post_act_handle *handle;
+	struct mlx5_flow_attr *attr2;
+	int i, j, err;
+
+	if (IS_ERR(post_act))
+		return PTR_ERR(post_act);
+
+	attr2 = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
+	parse_attr2 = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+	if (!attr2 || !parse_attr2) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+	attr2->parse_attr = parse_attr2;
+
+	handle = mlx5e_tc_post_act_add(post_act, attr2);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto err_free;
+	}
+
+	esw_attr = attr->esw_attr;
+	esw_attr2 = attr2->esw_attr;
+	esw_attr2->in_rep = esw_attr->in_rep;
+
+	parse_attr = attr->parse_attr;
+	parse_attr2->filter_dev = parse_attr->filter_dev;
+
+	for (i = split_count, j = 0; i < esw_attr->out_count; i++, j++)
+		esw_attr2->dests[j] = esw_attr->dests[i];
+
+	esw_attr2->out_count = j;
+	attr2->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	err = mlx5e_tc_post_act_offload(post_act, handle);
+	if (err)
+		goto err_post_act_offload;
+
+	err = mlx5e_tc_post_act_set_handle(flow->priv->mdev, handle,
+					   &parse_attr->mod_hdr_acts);
+	if (err)
+		goto err_post_act_set_handle;
+
+	esw_attr->out_count = split_count;
+	attr->extra_split_ft = mlx5e_tc_post_act_get_ft(post_act);
+	flow->extra_split_attr = attr2;
+
+	attr2->post_act_handle = handle;
+
+	return 0;
+
+err_post_act_set_handle:
+	mlx5e_tc_post_act_unoffload(post_act, handle);
+err_post_act_offload:
+	mlx5e_tc_post_act_del(post_act, handle);
+err_free:
+	kvfree(parse_attr2);
+	kfree(attr2);
+	return err;
+}
+
+static int
 post_process_attr(struct mlx5e_tc_flow *flow,
 		  struct mlx5_flow_attr *attr,
 		  struct netlink_ext_ack *extack)
 {
+	int extra_split;
 	bool vf_tun;
 	int err = 0;
 
@@ -1757,6 +1865,13 @@ post_process_attr(struct mlx5e_tc_flow *flow,
 			goto err_out;
 	}
 
+	extra_split = extra_split_attr_dests_needed(flow, attr);
+	if (extra_split > 0) {
+		err = extra_split_attr_dests(flow, attr, extra_split);
+		if (err)
+			goto err_out;
+	}
+
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
 		if (err)
@@ -1971,6 +2086,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 	mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
 
 	free_flow_post_acts(flow);
+	if (flow->extra_split_attr) {
+		mlx5_free_flow_attr_actions(flow, flow->extra_split_attr);
+		kvfree(flow->extra_split_attr->parse_attr);
+		kfree(flow->extra_split_attr);
+	}
 	mlx5_free_flow_attr_actions(flow, attr);
 
 	kvfree(attr->esw_attr->rx_tun_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index c24bda56b2b5..e1b8cb78369f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -86,6 +86,7 @@ struct mlx5_flow_attr {
 	u32 dest_chain;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_table *dest_ft;
+	struct mlx5_flow_table *extra_split_ft;
 	u8 inner_match_level;
 	u8 outer_match_level;
 	u8 tun_ip_version;
@@ -139,7 +140,7 @@ struct mlx5_rx_tun_attr {
 #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
 #define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
 
-#define MLX5E_TC_MAX_INT_PORT_NUM (8)
+#define MLX5E_TC_MAX_INT_PORT_NUM (32)
 
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index cb7e7e4104af..2505f90c0b39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -835,28 +835,9 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx)
 	mlx5_irq_release_vector(irq);
 }
 
-static int mlx5_cpumask_default_spread(int numa_node, int index)
+static int mlx5_cpumask_default_spread(struct mlx5_core_dev *dev, int index)
 {
-	const struct cpumask *prev = cpu_none_mask;
-	const struct cpumask *mask;
-	int found_cpu = 0;
-	int i = 0;
-	int cpu;
-
-	rcu_read_lock();
-	for_each_numa_hop_mask(mask, numa_node) {
-		for_each_cpu_andnot(cpu, mask, prev) {
-			if (i++ == index) {
-				found_cpu = cpu;
-				goto spread_done;
-			}
-		}
-		prev = mask;
-	}
-
-spread_done:
-	rcu_read_unlock();
-	return found_cpu;
+	return cpumask_local_spread(index, dev->priv.numa_node);
 }
 
 static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev)
@@ -880,7 +861,7 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx)
 	int cpu;
 
 	rmap = mlx5_eq_table_get_pci_rmap(dev);
-	cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx);
+	cpu = mlx5_cpumask_default_spread(dev, vecidx);
 	irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap);
 	if (IS_ERR(irq))
 		return PTR_ERR(irq);
@@ -915,7 +896,7 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
 	if (!mlx5_irq_pool_is_sf_pool(pool))
 		return comp_irq_request_pci(dev, vecidx);
 
-	af_desc.is_managed = 1;
+	af_desc.is_managed = false;
 	cpumask_copy(&af_desc.mask, cpu_online_mask);
 	cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
 	irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
@@ -1145,7 +1126,7 @@ int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector)
 	if (mask)
 		cpu = cpumask_first(mask);
 	else
-		cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector);
+		cpu = mlx5_cpumask_default_spread(dev, vector);
 
 	return cpu;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 255bc8b749f9..8587cd572da5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -319,7 +319,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
 		return -EPERM;
 
 	mutex_lock(&esw->state_lock);
-	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+	if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -339,7 +339,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
 	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 
-	if (esw->mode != MLX5_ESWITCH_LEGACY)
+	if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw))
 		return -EOPNOTSUPP;
 
 	*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 20146a2dc7f4..02a3563f51ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -312,6 +312,25 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
 	return err;
 }
 
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+	switch (type) {
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_TSAR;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+	}
+	return false;
+}
+
 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
 					      struct mlx5_vport *vport,
 					      u32 max_rate, u32 bw_share)
@@ -323,6 +342,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
 	void *vport_elem;
 	int err;
 
+	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
+		return -EOPNOTSUPP;
+
 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
 	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
@@ -421,6 +443,7 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
 {
 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 	struct mlx5_esw_rate_group *group;
+	__be32 *attr;
 	u32 divider;
 	int err;
 
@@ -428,6 +451,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
 	if (!group)
 		return ERR_PTR(-ENOMEM);
 
+	MLX5_SET(scheduling_context, tsar_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
 		 esw->qos.root_tsar_ix);
 	err = mlx5_create_scheduling_element_cmd(esw->dev,
@@ -526,25 +555,6 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
 	return err;
 }
 
-static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
-{
-	switch (type) {
-	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
-		return MLX5_CAP_QOS(dev, esw_element_type) &
-		       ELEMENT_TYPE_CAP_MASK_TSAR;
-	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
-		return MLX5_CAP_QOS(dev, esw_element_type) &
-		       ELEMENT_TYPE_CAP_MASK_VPORT;
-	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
-		return MLX5_CAP_QOS(dev, esw_element_type) &
-		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
-	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
-		return MLX5_CAP_QOS(dev, esw_element_type) &
-		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
-	}
-	return false;
-}
-
 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
 {
 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
@@ -555,7 +565,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
 		return -EOPNOTSUPP;
 
-	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
+	    !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
 		return -EOPNOTSUPP;
 
 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 578466d69f21..f44b4c7ebcfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -887,9 +887,6 @@ int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_v
 					 bool enable);
 int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
 					       u16 vport_num);
-void mlx5_esw_vport_ipsec_offload_enable(struct mlx5_eswitch *esw);
-void mlx5_esw_vport_ipsec_offload_disable(struct mlx5_eswitch *esw);
-
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 768199d2255a..f24f91d213f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -613,6 +613,13 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
 		}
 	}
 
+	if (attr->extra_split_ft) {
+		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+		dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest[*i].ft = attr->extra_split_ft;
+		(*i)++;
+	}
+
 out:
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9b8599c200e2..676005854dad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -463,7 +463,7 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
 	int num_encap = 0;
 
 	*extended_dest = false;
-	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+	if (!(fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return 0;
 
 	list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -502,17 +502,17 @@ mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context)
 				   execute_aso[0]);
 	MLX5_SET(execute_aso, execute_aso, valid, 1);
 	MLX5_SET(execute_aso, execute_aso, aso_object_id,
-		 fte->action.exe_aso.object_id);
+		 fte->act_dests.action.exe_aso.object_id);
 
 	exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl);
 	MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id,
-		 fte->action.exe_aso.return_reg_id);
+		 fte->act_dests.action.exe_aso.return_reg_id);
 	MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type,
-		 fte->action.exe_aso.type);
+		 fte->act_dests.action.exe_aso.type);
 	MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color,
-		 fte->action.exe_aso.flow_meter.init_color);
+		 fte->act_dests.action.exe_aso.flow_meter.init_color);
 	MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id,
-		 fte->action.exe_aso.flow_meter.meter_idx);
+		 fte->act_dests.action.exe_aso.flow_meter.meter_idx);
 }
 
 static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -541,7 +541,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	else
 		dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
 
-	inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
+	inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->act_dests.dests_size * dst_cnt_size;
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -553,7 +553,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(set_fte_in, in, table_id,   ft->id);
 	MLX5_SET(set_fte_in, in, flow_index, fte->index);
 	MLX5_SET(set_fte_in, in, ignore_flow_level,
-		 !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
+		 !!(fte->act_dests.action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
 
 	MLX5_SET(set_fte_in, in, vport_number, ft->vport);
 	MLX5_SET(set_fte_in, in, other_vport,
@@ -563,23 +563,23 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 
 	MLX5_SET(flow_context, in_flow_context, flow_tag,
-		 fte->flow_context.flow_tag);
+		 fte->act_dests.flow_context.flow_tag);
 	MLX5_SET(flow_context, in_flow_context, flow_source,
-		 fte->flow_context.flow_source);
+		 fte->act_dests.flow_context.flow_source);
 	MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en,
-		 !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN));
+		 !!(fte->act_dests.flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN));
 
 	MLX5_SET(flow_context, in_flow_context, extended_destination,
 		 extended_dest);
 
-	action = fte->action.action;
+	action = fte->act_dests.action.action;
 	if (extended_dest)
 		action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 
 	MLX5_SET(flow_context, in_flow_context, action, action);
 
-	if (!extended_dest && fte->action.pkt_reformat) {
-		struct mlx5_pkt_reformat *pkt_reformat = fte->action.pkt_reformat;
+	if (!extended_dest && fte->act_dests.action.pkt_reformat) {
+		struct mlx5_pkt_reformat *pkt_reformat = fte->act_dests.action.pkt_reformat;
 
 		if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
 			reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat);
@@ -591,46 +591,46 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				goto err_out;
 			}
 		} else {
-			reformat_id = fte->action.pkt_reformat->id;
+			reformat_id = fte->act_dests.action.pkt_reformat->id;
 		}
 	}
 
 	MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id);
 
-	if (fte->action.modify_hdr) {
-		if (fte->action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
+	if (fte->act_dests.action.modify_hdr) {
+		if (fte->act_dests.action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
 			mlx5_core_err(dev, "Can't use SW-owned modify_hdr in FW-owned table\n");
 			err = -EOPNOTSUPP;
 			goto err_out;
 		}
 
 		MLX5_SET(flow_context, in_flow_context, modify_header_id,
-			 fte->action.modify_hdr->id);
+			 fte->act_dests.action.modify_hdr->id);
 	}
 
 	MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type,
-		 fte->action.crypto.type);
+		 fte->act_dests.action.crypto.type);
 	MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id,
-		 fte->action.crypto.obj_id);
+		 fte->act_dests.action.crypto.obj_id);
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
-	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
-	MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
-	MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+	MLX5_SET(vlan, vlan, ethtype, fte->act_dests.action.vlan[0].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->act_dests.action.vlan[0].vid);
+	MLX5_SET(vlan, vlan, prio, fte->act_dests.action.vlan[0].prio);
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
 
-	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
-	MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
-	MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+	MLX5_SET(vlan, vlan, ethtype, fte->act_dests.action.vlan[1].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->act_dests.action.vlan[1].vid);
+	MLX5_SET(vlan, vlan, prio, fte->act_dests.action.vlan[1].prio);
 
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
 	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -706,7 +706,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			 list_size);
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
 					log_max_flow_counter,
 					ft->type));
@@ -731,8 +731,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			 list_size);
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
-		if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+		if (fte->act_dests.action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) {
 			mlx5_cmd_set_fte_flow_meter(fte, in_flow_context);
 		} else {
 			err = -EOPNOTSUPP;
@@ -1071,7 +1071,7 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
 static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
 				     enum fs_flow_table_type ft_type)
 {
-	return 0;
+	return MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH;
 }
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 53e0e5137d3f..7eb7b3ffe3d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -124,4 +124,12 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mode);
 int mlx5_fs_cmd_set_tx_flow_table_root(struct mlx5_core_dev *dev, u32 ft_id, bool disconnect);
+
+static inline bool mlx5_fs_cmd_is_fw_term_table(struct mlx5_flow_table *ft)
+{
+	if (ft->flags & MLX5_FLOW_TABLE_TERMINATION)
+		return true;
+
+	return false;
+}
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a47d6419160d..8505d5e241e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -605,12 +605,37 @@ static void modify_fte(struct fs_fte *fte)
 	dev = get_dev(&fte->node);
 
 	root = find_root(&ft->node);
-	err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
+	err = root->cmds->update_fte(root, ft, fg, fte->act_dests.modify_mask, fte);
 	if (err)
 		mlx5_core_warn(dev,
 			       "%s can't del rule fg id=%d fte_index=%d\n",
 			       __func__, fg->id, fte->index);
-	fte->modify_mask = 0;
+	fte->act_dests.modify_mask = 0;
+}
+
+static void del_sw_hw_dup_rule(struct fs_node *node)
+{
+	struct mlx5_flow_rule *rule;
+	struct fs_fte *fte;
+
+	fs_get_obj(rule, node);
+	fs_get_obj(fte, rule->node.parent);
+	trace_mlx5_fs_del_rule(rule);
+
+	if (is_fwd_next_action(rule->sw_action)) {
+		mutex_lock(&rule->dest_attr.ft->lock);
+		list_del(&rule->next_ft);
+		mutex_unlock(&rule->dest_attr.ft->lock);
+	}
+
+	/* If a pending rule is being deleted it means
+	 * this is a NO APPEND rule, so there are no partial deletions,
+	 * all the rules of the mlx5_flow_handle are going to be deleted
+	 * and the rules aren't shared with any other mlx5_flow_handle instance
+	 * so no need to do any bookkeeping like in del_sw_hw_rule().
+	 */
+
+	kfree(rule);
 }
 
 static void del_sw_hw_rule(struct fs_node *node)
@@ -628,29 +653,29 @@ static void del_sw_hw_rule(struct fs_node *node)
 	}
 
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) {
-		--fte->dests_size;
-		fte->modify_mask |=
+		--fte->act_dests.dests_size;
+		fte->act_dests.modify_mask |=
 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
-		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		goto out;
 	}
 
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
-		--fte->dests_size;
-		fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
-		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+		--fte->act_dests.dests_size;
+		fte->act_dests.modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+		fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 		goto out;
 	}
 
 	if (is_fwd_dest_type(rule->dest_attr.type)) {
-		--fte->dests_size;
-		--fte->fwd_dests;
+		--fte->act_dests.dests_size;
+		--fte->act_dests.fwd_dests;
 
-		if (!fte->fwd_dests)
-			fte->action.action &=
+		if (!fte->act_dests.fwd_dests)
+			fte->act_dests.action.action &=
 				~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-		fte->modify_mask |=
+		fte->act_dests.modify_mask |=
 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
 		goto out;
 	}
@@ -658,12 +683,33 @@ out:
 	kfree(rule);
 }
 
+static void switch_to_pending_act_dests(struct fs_fte *fte)
+{
+	struct fs_node *iter;
+
+	memcpy(&fte->act_dests, &fte->dup->act_dests, sizeof(fte->act_dests));
+
+	list_bulk_move_tail(&fte->node.children,
+			    fte->dup->children.next,
+			    fte->dup->children.prev);
+
+	list_for_each_entry(iter, &fte->node.children, list)
+		iter->del_sw_func = del_sw_hw_rule;
+
+	/* Make sure the fte isn't deleted
+	 * as mlx5_del_flow_rules() decreases the refcount
+	 * of the fte to trigger deletion.
+	 */
+	tree_get_node(&fte->node);
+}
+
 static void del_hw_fte(struct fs_node *node)
 {
 	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev;
+	bool pending_used = false;
 	struct fs_fte *fte;
 	int err;
 
@@ -672,16 +718,35 @@ static void del_hw_fte(struct fs_node *node)
 	fs_get_obj(ft, fg->node.parent);
 
 	trace_mlx5_fs_del_fte(fte);
-	WARN_ON(fte->dests_size);
+	WARN_ON(fte->act_dests.dests_size);
 	dev = get_dev(&ft->node);
 	root = find_root(&ft->node);
+
+	if (fte->dup && !list_empty(&fte->dup->children)) {
+		switch_to_pending_act_dests(fte);
+		pending_used = true;
+	} else {
+		/* Avoid double call to del_hw_fte */
+		node->del_hw_func = NULL;
+	}
+
 	if (node->active) {
-		err = root->cmds->delete_fte(root, ft, fte);
-		if (err)
-			mlx5_core_warn(dev,
-				       "flow steering can't delete fte in index %d of flow group id %d\n",
-				       fte->index, fg->id);
-		node->active = false;
+		if (pending_used) {
+			err = root->cmds->update_fte(root, ft, fg,
+						     fte->act_dests.modify_mask, fte);
+			if (err)
+				mlx5_core_warn(dev,
+					       "flow steering can't update to pending rule in index %d of flow group id %d\n",
+					       fte->index, fg->id);
+			fte->act_dests.modify_mask = 0;
+		} else {
+			err = root->cmds->delete_fte(root, ft, fte);
+			if (err)
+				mlx5_core_warn(dev,
+					       "flow steering can't delete fte in index %d of flow group id %d\n",
+					       fte->index, fg->id);
+			node->active = false;
+		}
 	}
 }
 
@@ -700,6 +765,7 @@ static void del_sw_fte(struct fs_node *node)
 				     rhash_fte);
 	WARN_ON(err);
 	ida_free(&fg->fte_allocator, fte->index - fg->start_index);
+	kvfree(fte->dup);
 	kmem_cache_free(steering->ftes_cache, fte);
 }
 
@@ -782,8 +848,8 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 
 	memcpy(fte->val, &spec->match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->action = *flow_act;
-	fte->flow_context = spec->flow_context;
+	fte->act_dests.action = *flow_act;
+	fte->act_dests.flow_context = spec->flow_context;
 
 	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
 
@@ -1103,18 +1169,45 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	return err;
 }
 
+static bool rule_is_pending(struct fs_fte *fte, struct mlx5_flow_rule *rule)
+{
+	struct mlx5_flow_rule *tmp_rule;
+	struct fs_node *iter;
+
+	if (!fte->dup || list_empty(&fte->dup->children))
+		return false;
+
+	list_for_each_entry(iter, &fte->dup->children, list) {
+		tmp_rule = container_of(iter, struct mlx5_flow_rule, node);
+
+		if (tmp_rule == rule)
+			return true;
+	}
+
+	return false;
+}
+
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 					 struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_root_namespace *root;
+	struct fs_fte_action *act_dests;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
+	bool pending = false;
 	struct fs_fte *fte;
 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
-	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+
+	pending = rule_is_pending(fte, rule);
+	if (pending)
+		act_dests = &fte->dup->act_dests;
+	else
+		act_dests = &fte->act_dests;
+
+	if (!(act_dests->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
 	down_write_ref_node(&fte->node, false);
 	fs_get_obj(fg, fte->node.parent);
@@ -1122,8 +1215,9 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
 	root = find_root(&ft->node);
-	err = root->cmds->update_fte(root, ft, fg,
-				     modify_mask, fte);
+	if (!pending)
+		err = root->cmds->update_fte(root, ft, fg,
+					     modify_mask, fte);
 	up_write_ref_node(&fte->node, false);
 
 	return err;
@@ -1453,6 +1547,16 @@ static struct mlx5_flow_handle *alloc_handle(int num_rules)
 	return handle;
 }
 
+static void destroy_flow_handle_dup(struct mlx5_flow_handle *handle,
+				    int i)
+{
+	for (; --i >= 0;) {
+		list_del(&handle->rule[i]->node.list);
+		kfree(handle->rule[i]);
+	}
+	kfree(handle);
+}
+
 static void destroy_flow_handle(struct fs_fte *fte,
 				struct mlx5_flow_handle *handle,
 				struct mlx5_flow_destination *dest,
@@ -1460,7 +1564,7 @@ static void destroy_flow_handle(struct fs_fte *fte,
 {
 	for (; --i >= 0;) {
 		if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
-			fte->dests_size--;
+			fte->act_dests.dests_size--;
 			list_del(&handle->rule[i]->node.list);
 			kfree(handle->rule[i]);
 		}
@@ -1469,6 +1573,61 @@ static void destroy_flow_handle(struct fs_fte *fte,
 }
 
 static struct mlx5_flow_handle *
+create_flow_handle_dup(struct list_head *children,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num,
+		       struct fs_fte_action *act_dests)
+{
+	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+	struct mlx5_flow_rule *rule = NULL;
+	struct mlx5_flow_handle *handle;
+	int i = 0;
+	int type;
+
+	handle = alloc_handle((dest_num) ? dest_num : 1);
+	if (!handle)
+		return NULL;
+
+	do {
+		rule = alloc_rule(dest + i);
+		if (!rule)
+			goto free_rules;
+
+		/* Add dest to dests list- we need flow tables to be in the
+		 * end of the list for forward to next prio rules.
+		 */
+		tree_init_node(&rule->node, NULL, del_sw_hw_dup_rule);
+		if (dest &&
+		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+			list_add(&rule->node.list, children);
+		else
+			list_add_tail(&rule->node.list, children);
+
+		if (dest) {
+			act_dests->dests_size++;
+
+			if (is_fwd_dest_type(dest[i].type))
+				act_dests->fwd_dests++;
+
+			type = dest[i].type ==
+				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+			act_dests->modify_mask |= type ? count : dst;
+		}
+		handle->rule[i] = rule;
+	} while (++i < dest_num);
+
+	return handle;
+
+free_rules:
+	destroy_flow_handle_dup(handle, i);
+	act_dests->dests_size = 0;
+	act_dests->fwd_dests = 0;
+
+	return NULL;
+}
+
+static struct mlx5_flow_handle *
 create_flow_handle(struct fs_fte *fte,
 		   struct mlx5_flow_destination *dest,
 		   int dest_num,
@@ -1510,10 +1669,10 @@ create_flow_handle(struct fs_fte *fte,
 		else
 			list_add_tail(&rule->node.list, &fte->node.children);
 		if (dest) {
-			fte->dests_size++;
+			fte->act_dests.dests_size++;
 
 			if (is_fwd_dest_type(dest[i].type))
-				fte->fwd_dests++;
+				fte->act_dests.fwd_dests++;
 
 			type = dest[i].type ==
 				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -1774,17 +1933,17 @@ static int check_conflicting_ftes(struct fs_fte *fte,
 				  const struct mlx5_flow_context *flow_context,
 				  const struct mlx5_flow_act *flow_act)
 {
-	if (check_conflicting_actions(flow_act, &fte->action)) {
+	if (check_conflicting_actions(flow_act, &fte->act_dests.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "Found two FTEs with conflicting actions\n");
 		return -EEXIST;
 	}
 
 	if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
-	    fte->flow_context.flow_tag != flow_context->flow_tag) {
+	    fte->act_dests.flow_context.flow_tag != flow_context->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->flow_context.flow_tag,
+			       fte->act_dests.flow_context.flow_tag,
 			       flow_context->flow_tag);
 		return -EEXIST;
 	}
@@ -1808,12 +1967,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	if (ret)
 		return ERR_PTR(ret);
 
-	old_action = fte->action.action;
-	fte->action.action |= flow_act->action;
+	old_action = fte->act_dests.action.action;
+	fte->act_dests.action.action |= flow_act->action;
 	handle = add_rule_fte(fte, fg, dest, dest_num,
 			      old_action != flow_act->action);
 	if (IS_ERR(handle)) {
-		fte->action.action = old_action;
+		fte->act_dests.action.action = old_action;
 		return handle;
 	}
 	trace_mlx5_fs_set_fte(fte, false);
@@ -1961,6 +2120,62 @@ out:
 	return fte_tmp;
 }
 
+/* Native capability lacks support for adding an additional match with the same value
+ * to the same flow group. To accommodate the NO APPEND flag in these scenarios,
+ * we include the new rule in the existing flow table entry (fte) without immediate
+ * hardware commitment. When a request is made to delete the corresponding hardware rule,
+ * we then commit the pending rule to hardware.
+ */
+static struct mlx5_flow_handle *
+add_rule_dup_match_fte(struct fs_fte *fte,
+		       const struct mlx5_flow_spec *spec,
+		       struct mlx5_flow_act *flow_act,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num)
+{
+	struct mlx5_flow_handle *handle;
+	struct fs_fte_dup *dup;
+	int i = 0;
+
+	if (!fte->dup) {
+		dup = kvzalloc(sizeof(*dup), GFP_KERNEL);
+		if (!dup)
+			return ERR_PTR(-ENOMEM);
+		/* dup will be freed when the fte is freed
+		 * this way we don't allocate / free dup on every rule deletion
+		 * or creation
+		 */
+		INIT_LIST_HEAD(&dup->children);
+		fte->dup = dup;
+	}
+
+	if (!list_empty(&fte->dup->children)) {
+		mlx5_core_warn(get_dev(&fte->node),
+			       "Can have only a single duplicate rule\n");
+
+		return ERR_PTR(-EEXIST);
+	}
+
+	fte->dup->act_dests.action = *flow_act;
+	fte->dup->act_dests.flow_context = spec->flow_context;
+	fte->dup->act_dests.dests_size = 0;
+	fte->dup->act_dests.fwd_dests = 0;
+	fte->dup->act_dests.modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+
+	handle = create_flow_handle_dup(&fte->dup->children,
+					dest, dest_num,
+					&fte->dup->act_dests);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < handle->num_rules; i++) {
+		tree_add_node(&handle->rule[i]->node, &fte->node);
+		trace_mlx5_fs_add_rule(handle->rule[i]);
+	}
+
+	return handle;
+}
+
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
@@ -1971,6 +2186,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       int ft_version)
 {
 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_group *g;
 	struct mlx5_flow_handle *rule;
 	struct match_list *iter;
@@ -1984,7 +2200,9 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		return  ERR_PTR(-ENOMEM);
 
 search_again_locked:
-	if (flow_act->flags & FLOW_ACT_NO_APPEND)
+	if (flow_act->flags & FLOW_ACT_NO_APPEND &&
+	    (root->cmds->get_capabilities(root, root->table_type) &
+	     MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH))
 		goto skip_search;
 	version = matched_fgs_get_version(match_head);
 	/* Try to find an fte with identical match value and attempt update its
@@ -1997,7 +2215,10 @@ search_again_locked:
 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
 		if (!fte_tmp)
 			continue;
-		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
+		if (flow_act->flags & FLOW_ACT_NO_APPEND)
+			rule = add_rule_dup_match_fte(fte_tmp, spec, flow_act, dest, dest_num);
+		else
+			rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
 		/* No error check needed here, because insert_fte() is not called */
 		up_write_ref_node(&fte_tmp->node, false);
 		tree_put_node(&fte_tmp->node, false);
@@ -2265,12 +2486,10 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
 		tree_remove_node(&handle->rule[i]->node, true);
 	if (list_empty(&fte->node.children)) {
 		fte->node.del_hw_func(&fte->node);
-		/* Avoid double call to del_hw_fte */
-		fte->node.del_hw_func = NULL;
 		up_write_ref_node(&fte->node, false);
 		tree_put_node(&fte->node, false);
-	} else if (fte->dests_size) {
-		if (fte->modify_mask)
+	} else if (fte->act_dests.dests_size) {
+		if (fte->act_dests.modify_mask)
 			modify_fte(fte);
 		up_write_ref_node(&fte->node, false);
 	} else {
@@ -3590,8 +3809,8 @@ out:
 }
 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
 
-static struct mlx5_flow_root_namespace
-*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
+struct mlx5_flow_root_namespace *
+mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
 {
 	struct mlx5_flow_namespace *ns;
 
@@ -3614,7 +3833,7 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 	struct mlx5_modify_hdr *modify_hdr;
 	int err;
 
-	root = get_root_namespace(dev, ns_type);
+	root = mlx5_get_root_namespace(dev, ns_type);
 	if (!root)
 		return ERR_PTR(-EOPNOTSUPP);
 
@@ -3639,7 +3858,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_root_namespace *root;
 
-	root = get_root_namespace(dev, modify_hdr->ns_type);
+	root = mlx5_get_root_namespace(dev, modify_hdr->ns_type);
 	if (WARN_ON(!root))
 		return;
 	root->cmds->modify_header_dealloc(root, modify_hdr);
@@ -3655,7 +3874,7 @@ struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
 	struct mlx5_flow_root_namespace *root;
 	int err;
 
-	root = get_root_namespace(dev, ns_type);
+	root = mlx5_get_root_namespace(dev, ns_type);
 	if (!root)
 		return ERR_PTR(-EOPNOTSUPP);
 
@@ -3681,7 +3900,7 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_root_namespace *root;
 
-	root = get_root_namespace(dev, pkt_reformat->ns_type);
+	root = mlx5_get_root_namespace(dev, pkt_reformat->ns_type);
 	if (WARN_ON(!root))
 		return;
 	root->cmds->packet_reformat_dealloc(root, pkt_reformat);
@@ -3703,7 +3922,7 @@ mlx5_create_match_definer(struct mlx5_core_dev *dev,
 	struct mlx5_flow_definer *definer;
 	int id;
 
-	root = get_root_namespace(dev, ns_type);
+	root = mlx5_get_root_namespace(dev, ns_type);
 	if (!root)
 		return ERR_PTR(-EOPNOTSUPP);
 
@@ -3727,7 +3946,7 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_root_namespace *root;
 
-	root = get_root_namespace(dev, definer->ns_type);
+	root = mlx5_get_root_namespace(dev, definer->ns_type);
 	if (WARN_ON(!root))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 78eb6b7097e1..964937f17cf5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -110,7 +110,9 @@ enum fs_flow_table_type {
 	FS_FT_RDMA_RX		= 0X7,
 	FS_FT_RDMA_TX		= 0X8,
 	FS_FT_PORT_SEL		= 0X9,
-	FS_FT_MAX_TYPE = FS_FT_PORT_SEL,
+	FS_FT_FDB_RX		= 0xa,
+	FS_FT_FDB_TX		= 0xb,
+	FS_FT_MAX_TYPE = FS_FT_FDB_TX,
 };
 
 enum fs_flow_table_op_mod {
@@ -131,6 +133,7 @@ enum mlx5_flow_steering_capabilty {
 	MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
 	MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
 	MLX5_FLOW_STEERING_CAP_MATCH_RANGES = 1UL << 2,
+	MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH = 1UL << 3,
 };
 
 struct mlx5_flow_steering {
@@ -228,20 +231,29 @@ struct mlx5_ft_underlay_qp {
 			   MLX5_BYTE_OFF(fte_match_param,		     \
 					 MLX5_FTE_MATCH_PARAM_RESERVED)))
 
+struct fs_fte_action {
+	int				modify_mask;
+	u32				dests_size;
+	u32				fwd_dests;
+	struct mlx5_flow_context	flow_context;
+	struct mlx5_flow_act		action;
+};
+
+struct fs_fte_dup {
+	struct list_head children;
+	struct fs_fte_action act_dests;
+};
+
 /* Type of children is mlx5_flow_rule */
 struct fs_fte {
 	struct fs_node			node;
 	struct mlx5_fs_dr_rule		fs_dr_rule;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
-	u32				dests_size;
-	u32				fwd_dests;
+	struct fs_fte_action		act_dests;
+	struct fs_fte_dup		*dup;
 	u32				index;
-	struct mlx5_flow_context	flow_context;
-	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
-	struct mlx5_fc			*counter;
 	struct rhash_head		hash;
-	int				modify_mask;
 };
 
 /* Type of children is mlx5_flow_table/namespace */
@@ -368,7 +380,9 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
 	(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) :		\
 	(type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) :      \
 	(type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) :      \
-	(BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\
+	(type == FS_FT_FDB_RX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) :      \
+	(type == FS_FT_FDB_TX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) :      \
+	(BUILD_BUG_ON_ZERO(FS_FT_FDB_TX != FS_FT_MAX_TYPE))\
 	)
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index b61b7d966114..76ad46bf477d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -224,6 +224,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, mcam_reg)) {
 		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128);
 		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F);
+		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9180_0x91FF);
 	}
 
 	if (MLX5_CAP_GEN(dev, qcam_reg))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 979c49ae6b5c..4f55e55ecb55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -26,6 +26,7 @@ struct mlx5_fw_reset {
 	struct work_struct reset_now_work;
 	struct work_struct reset_abort_work;
 	unsigned long reset_flags;
+	u8 reset_method;
 	struct timer_list timer;
 	struct completion done;
 	int ret;
@@ -95,7 +96,7 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
 }
 
 static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
-			       u8 *reset_type, u8 *reset_state)
+			       u8 *reset_type, u8 *reset_state, u8 *reset_method)
 {
 	u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
 	u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -111,13 +112,26 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
 		*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
 	if (reset_state)
 		*reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+	if (reset_method)
+		*reset_method = MLX5_GET(mfrl_reg, out, pci_reset_req_method);
 
 	return 0;
 }
 
 int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
 {
-	return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+	return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
+					  u8 *reset_method)
+{
+	if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) {
+		*reset_method = MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE;
+		return 0;
+	}
+
+	return mlx5_reg_mfrl_query(dev, NULL, NULL, NULL, reset_method);
 }
 
 static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
@@ -125,7 +139,7 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
 {
 	u8 reset_state;
 
-	if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+	if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state, NULL))
 		goto out;
 
 	if (!reset_state)
@@ -207,6 +221,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
 static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded)
 {
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+	struct devlink *devlink = priv_to_devlink(dev);
 
 	/* if this is the driver that initiated the fw reset, devlink completed the reload */
 	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
@@ -218,9 +233,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload
 			mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
 		else
 			mlx5_load_one(dev, true);
-		devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
+		devl_lock(devlink);
+		devlink_remote_reload_actions_performed(devlink, 0,
 							BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
 							BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+		devl_unlock(devlink);
 	}
 }
 
@@ -395,7 +412,8 @@ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
 	return 0;
 }
 
-static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev)
+static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev,
+				      u8 reset_method)
 {
 	u16 dev_id;
 	int err;
@@ -406,9 +424,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev)
 	}
 
 #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
-	err = mlx5_check_hotplug_interrupt(dev);
-	if (err)
-		return false;
+	if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) {
+		err = mlx5_check_hotplug_interrupt(dev);
+		if (err)
+			return false;
+	}
 #endif
 
 	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
@@ -424,8 +444,12 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
 	struct mlx5_core_dev *dev = fw_reset->dev;
 	int err;
 
-	if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
-	    !mlx5_is_reset_now_capable(dev)) {
+	err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
+	if (err)
+		mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
+
+	if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
+	    !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) {
 		err = mlx5_fw_reset_set_reset_sync_nack(dev);
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
 			       err ? "Failed" : "Sent");
@@ -441,21 +465,15 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
-static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
 {
 	struct pci_bus *bridge_bus = dev->pdev->bus;
 	struct pci_dev *bridge = bridge_bus->self;
 	unsigned long timeout;
 	struct pci_dev *sdev;
-	u16 reg16, dev_id;
 	int cap, err;
+	u16 reg16;
 
-	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
-	if (err)
-		return pcibios_err_to_errno(err);
-	err = mlx5_check_dev_ids(dev, dev_id);
-	if (err)
-		return err;
 	cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
 	if (!cap)
 		return -EOPNOTSUPP;
@@ -525,6 +543,44 @@ restore:
 	return err;
 }
 
+static int mlx5_pci_reset_bus(struct mlx5_core_dev *dev)
+{
+	if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method))
+		return -EOPNOTSUPP;
+
+	return pci_reset_bus(dev->pdev);
+}
+
+static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method)
+{
+	u16 dev_id;
+	int err;
+
+	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+	if (err)
+		return pcibios_err_to_errno(err);
+	err = mlx5_check_dev_ids(dev, dev_id);
+	if (err)
+		return err;
+
+	switch (reset_method) {
+	case MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE:
+		err = mlx5_pci_link_toggle(dev, dev_id);
+		if (err)
+			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed\n");
+		break;
+	case MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET:
+		err = mlx5_pci_reset_bus(dev);
+		if (err)
+			mlx5_core_warn(dev, "mlx5_pci_reset_bus failed\n");
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
 static void mlx5_sync_reset_now_event(struct work_struct *work)
 {
 	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -543,9 +599,9 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
 		goto done;
 	}
 
-	err = mlx5_pci_link_toggle(dev);
+	err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
 	if (err) {
-		mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+		mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
 		set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
 	}
 
@@ -607,9 +663,9 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work)
 
 	mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
 	if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
-		err = mlx5_pci_link_toggle(dev);
+		err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
 		if (err) {
-			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+			mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err);
 			fw_reset->ret = err;
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 26f8a11b8906..9772327d5124 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -74,7 +74,7 @@ static int mlx5i_set_ringparam(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(dev);
 
-	return mlx5e_ethtool_set_ringparam(priv, param);
+	return mlx5e_ethtool_set_ringparam(priv, param, extack);
 }
 
 static void mlx5i_get_ringparam(struct net_device *dev,
@@ -132,7 +132,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 
-	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal);
+	return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal, extack);
 }
 
 static int mlx5i_get_ts_info(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
index f7b01b3f0cba..1477db7f5307 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -48,6 +48,7 @@ static struct mlx5_irq *
 irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
 {
 	struct irq_affinity_desc auto_desc = {};
+	struct mlx5_irq *irq;
 	u32 irq_index;
 	int err;
 
@@ -64,9 +65,12 @@ irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_de
 		else
 			cpu_get(pool, cpumask_first(&af_desc->mask));
 	}
-	return mlx5_irq_alloc(pool, irq_index,
-			      cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc,
-			      NULL);
+	irq = mlx5_irq_alloc(pool, irq_index,
+			     cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc,
+			     NULL);
+	if (IS_ERR(irq))
+		xa_erase(&pool->irqs, irq_index);
+	return irq;
 }
 
 /* Looking for the IRQ with the smallest refcount that fits req_mask.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index d0871c46b8c5..8577db3308cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -445,6 +445,34 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
 	return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
 }
 
+static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
+{
+	struct net_device *ndev = NULL;
+	struct mlx5_lag *ldev;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&lag_lock, flags);
+	ldev = mlx5_lag_dev(dev);
+
+	if (!ldev)
+		goto unlock;
+
+	for (i = 0; i < ldev->ports; i++)
+		if (ldev->tracker.netdev_state[i].tx_enabled)
+			ndev = ldev->pf[i].netdev;
+	if (!ndev)
+		ndev = ldev->pf[ldev->ports - 1].netdev;
+
+	if (ndev)
+		dev_hold(ndev);
+
+unlock:
+	spin_unlock_irqrestore(&lag_lock, flags);
+
+	return ndev;
+}
+
 void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker)
 {
@@ -477,9 +505,18 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 		}
 	}
 
-	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
-	    !(ldev->mode == MLX5_LAG_MODE_ROCE))
-		mlx5_lag_drop_rule_setup(ldev, tracker);
+	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
+
+		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
+			mlx5_lag_drop_rule_setup(ldev, tracker);
+		/** Only sriov and roce lag should have tracker->tx_type set so
+		 *  no need to check the mode
+		 */
+		blocking_notifier_call_chain(&dev0->priv.lag_nh,
+					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
+					     ndev);
+	}
 }
 
 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
@@ -613,6 +650,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 			mlx5_core_err(dev0,
 				      "Failed to deactivate RoCE LAG; driver restart required\n");
 	}
+	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
 
 	return err;
 }
@@ -1492,38 +1530,6 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
 	mlx5_queue_bond_work(ldev, 0);
 }
 
-struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
-{
-	struct net_device *ndev = NULL;
-	struct mlx5_lag *ldev;
-	unsigned long flags;
-	int i;
-
-	spin_lock_irqsave(&lag_lock, flags);
-	ldev = mlx5_lag_dev(dev);
-
-	if (!(ldev && __mlx5_lag_is_roce(ldev)))
-		goto unlock;
-
-	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
-		for (i = 0; i < ldev->ports; i++)
-			if (ldev->tracker.netdev_state[i].tx_enabled)
-				ndev = ldev->pf[i].netdev;
-		if (!ndev)
-			ndev = ldev->pf[ldev->ports - 1].netdev;
-	} else {
-		ndev = ldev->pf[MLX5_LAG_P1].netdev;
-	}
-	if (ndev)
-		dev_hold(ndev);
-
-unlock:
-	spin_unlock_irqrestore(&lag_lock, flags);
-
-	return ndev;
-}
-EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
-
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 			   struct net_device *slave)
 {
@@ -1538,7 +1544,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 		goto unlock;
 
 	for (i = 0; i < ldev->ports; i++) {
-		if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
+		if (ldev->pf[i].netdev == slave) {
 			port = i;
 			break;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0361741632a6..b306ae79bf97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -38,6 +38,10 @@
 #include "lib/eq.h"
 #include "en.h"
 #include "clock.h"
+#ifdef CONFIG_X86
+#include <linux/timekeeping.h>
+#include <linux/cpufeature.h>
+#endif /* CONFIG_X86 */
 
 enum {
 	MLX5_PIN_MODE_IN		= 0x0,
@@ -148,6 +152,87 @@ static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size)
 				    MLX5_REG_MTUTC, 0, 1);
 }
 
+#ifdef CONFIG_X86
+static bool mlx5_is_ptm_source_time_available(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(mtptm_reg)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtptm_reg)] = {0};
+	int err;
+
+	if (!MLX5_CAP_MCAM_REG3(dev, mtptm))
+		return false;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTPTM,
+				   0, 0);
+	if (err)
+		return false;
+
+	return !!MLX5_GET(mtptm_reg, out, psta);
+}
+
+static int mlx5_mtctr_syncdevicetime(ktime_t *device_time,
+				     struct system_counterval_t *sys_counterval,
+				     void *ctx)
+{
+	u32 out[MLX5_ST_SZ_DW(mtctr_reg)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtctr_reg)] = {0};
+	struct mlx5_core_dev *mdev = ctx;
+	bool real_time_mode;
+	u64 host, device;
+	int err;
+
+	real_time_mode = mlx5_real_time_mode(mdev);
+
+	MLX5_SET(mtctr_reg, in, first_clock_timestamp_request,
+		 MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK);
+	MLX5_SET(mtctr_reg, in, second_clock_timestamp_request,
+		 real_time_mode ? MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK :
+		 MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER);
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTCTR,
+				   0, 0);
+	if (err)
+		return err;
+
+	if (!MLX5_GET(mtctr_reg, out, first_clock_valid) ||
+	    !MLX5_GET(mtctr_reg, out, second_clock_valid))
+		return -EINVAL;
+
+	host = MLX5_GET64(mtctr_reg, out, first_clock_timestamp);
+	*sys_counterval = (struct system_counterval_t) {
+			.cycles = host,
+			.cs_id = CSID_X86_ART,
+			.use_nsecs = true,
+	};
+
+	device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp);
+	if (real_time_mode)
+		*device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX));
+	else
+		*device_time = mlx5_timecounter_cyc2time(&mdev->clock, device);
+
+	return 0;
+}
+
+static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp,
+				   struct system_device_crosststamp *cts)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+	struct system_time_snapshot history_begin = {0};
+	struct mlx5_core_dev *mdev;
+
+	mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+	if (!mlx5_is_ptm_source_time_available(mdev))
+		return -EBUSY;
+
+	ktime_get_snapshot(&history_begin);
+
+	return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
+					     &history_begin, cts);
+}
+#endif /* CONFIG_X86 */
+
 static u64 mlx5_read_time(struct mlx5_core_dev *dev,
 			  struct ptp_system_timestamp *sts,
 			  bool real_time)
@@ -1034,6 +1119,12 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
 	if (MLX5_CAP_MCAM_REG(mdev, mtutc))
 		mlx5_init_timer_max_freq_adjustment(mdev);
 
+#ifdef CONFIG_X86
+	if (MLX5_CAP_MCAM_REG3(mdev, mtptm) &&
+	    MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART))
+		clock->ptp_info.getcrosststamp = mlx5_ptp_getcrosststamp;
+#endif /* CONFIG_X86 */
+
 	mlx5_timecounter_init(mdev);
 	mlx5_init_clock_info(mdev);
 	mlx5_init_overflow_period(clock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c
index 234cd00f71a1..b7d4b1a2baf2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c
@@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev,
 		return -EOPNOTSUPP;
 
 	peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp);
-	if (!peer_priv) {
+	if (!peer_priv || !peer_priv->ipsec) {
+		mlx5_core_err(mdev, "IPsec not supported on master device\n");
 		err = -EOPNOTSUPP;
 		goto release_peer;
 	}
@@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev,
 		return -EOPNOTSUPP;
 
 	peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp);
-	if (!peer_priv) {
+	if (!peer_priv || !peer_priv->ipsec) {
+		mlx5_core_err(mdev, "IPsec not supported on master device\n");
 		err = -EOPNOTSUPP;
 		goto release_peer;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index f6deb5a3f820..eeb0b7ea05f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
 }
 
 static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
-			 u8 *host_buses, u8 *sd_group)
+			 u8 *host_buses)
 {
 	u32 out[MLX5_ST_SZ_DW(mpir_reg)];
 	int err;
@@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
 	if (err)
 		return err;
 
-	err = mlx5_query_nic_vport_sd_group(dev, sd_group);
-	if (err)
-		return err;
-
 	*sdm = MLX5_GET(mpir_reg, out, sdm);
 	*host_buses = MLX5_GET(mpir_reg, out, host_buses);
 
@@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev)
 	if (mlx5_core_is_ecpf(dev))
 		return 0;
 
+	err = mlx5_query_nic_vport_sd_group(dev, &sd_group);
+	if (err)
+		return err;
+
+	if (!sd_group)
+		return 0;
+
 	if (!MLX5_CAP_MCAM_REG(dev, mpir))
 		return 0;
 
-	err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group);
+	err = mlx5_query_sd(dev, &sdm, &host_buses);
 	if (err)
 		return err;
 
 	if (!sdm)
 		return 0;
 
-	if (!sd_group)
-		return 0;
-
 	group_id = mlx5_sd_group_id(dev, sd_group);
 
 	if (!mlx5_sd_is_supported(dev, host_buses)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 527da58c7953..220a9ac75c8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -454,8 +454,8 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
 
 static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 {
+	bool do_set = false, mem_page_fault = false;
 	void *set_hca_cap;
-	bool do_set = false;
 	int err;
 
 	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
@@ -470,6 +470,17 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
 	       MLX5_ST_SZ_BYTES(odp_cap));
 
+	/* For best performance, enable memory scheme ODP only when
+	 * it has page prefetch enabled.
+	 */
+	if (MLX5_CAP_ODP_MAX(dev, mem_page_fault) &&
+	    MLX5_CAP_ODP_MAX(dev, memory_page_fault_scheme_cap.page_prefetch)) {
+		mem_page_fault = true;
+		do_set = true;
+		MLX5_SET(odp_cap, set_hca_cap, mem_page_fault, mem_page_fault);
+		goto set;
+	}
+
 #define ODP_CAP_SET_MAX(dev, field)                                            \
 	do {                                                                   \
 		u32 _res = MLX5_CAP_ODP_MAX(dev, field);                       \
@@ -479,25 +490,28 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 		}                                                              \
 	} while (0)
 
-	ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
-	ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
-	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
-	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
-
-	if (!do_set)
-		return 0;
-
-	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.ud_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.rc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.atomic);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.atomic);
+
+set:
+	if (do_set)
+		err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+	mlx5_core_dbg(dev, "Using ODP %s scheme\n",
+		      mem_page_fault ? "memory" : "transport");
+	return err;
 }
 
 static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
@@ -619,6 +633,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
 		MLX5_SET(cmd_hca_cap, set_hca_cap,
 			 pci_sync_for_fw_update_with_driver_unload, 1);
+	if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method))
+		MLX5_SET(cmd_hca_cap, set_hca_cap,
+			 pcie_reset_using_hotreset_method, 1);
 
 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 		MLX5_SET(cmd_hca_cap,
@@ -923,6 +940,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 	}
 
 	mlx5_pci_vsc_init(dev);
+
+	err = pci_enable_ptm(pdev, NULL);
+	if (err)
+		mlx5_core_info(dev, "PTM is not supported by PCIe\n");
+
 	return 0;
 
 err_clr_master:
@@ -939,6 +961,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev)
 	 * before removing the pci bars
 	 */
 	mlx5_drain_health_wq(dev);
+	pci_disable_ptm(dev->pdev);
 	iounmap(dev->iseg);
 	release_bar(dev->pdev);
 	mlx5_pci_disable_device(dev);
@@ -2142,7 +2165,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 	/* Panic tear down fw command will stop the PCI bus communication
 	 * with the HCA, so the health poll is no longer needed.
 	 */
-	mlx5_drain_health_wq(dev);
 	mlx5_stop_health_poll(dev, false);
 
 	ret = mlx5_cmd_fast_teardown_hca(dev);
@@ -2177,6 +2199,7 @@ static void shutdown(struct pci_dev *pdev)
 
 	mlx5_core_info(dev, "Shutdown was called\n");
 	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+	mlx5_drain_health_wq(dev);
 	err = mlx5_try_fast_unload(dev);
 	if (err)
 		mlx5_unload_one(dev, false);
@@ -2217,6 +2240,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x101f) },			/* ConnectX-6 LX */
 	{ PCI_VDEVICE(MELLANOX, 0x1021) },			/* ConnectX-7 */
 	{ PCI_VDEVICE(MELLANOX, 0x1023) },			/* ConnectX-8 */
+	{ PCI_VDEVICE(MELLANOX, 0x1025) },			/* ConnectX-9 */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d2) },			/* BlueField integrated ConnectX-5 network controller */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},	/* BlueField integrated ConnectX-5 network controller VF */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d6) },			/* BlueField-2 integrated ConnectX-6 Dx network controller */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index d894a88fa9f2..972e8e9df585 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -608,6 +608,11 @@ enum {
 	RELEASE_ALL_PAGES_MASK = 0x4000,
 };
 
+/* This limit is based on the capability of the firmware as it cannot release
+ * more than 50000 back to the host in one go.
+ */
+#define MAX_RECLAIM_NPAGES (-50000)
+
 static int req_pages_handler(struct notifier_block *nb,
 			     unsigned long type, void *data)
 {
@@ -639,7 +644,16 @@ static int req_pages_handler(struct notifier_block *nb,
 
 	req->dev = dev;
 	req->func_id = func_id;
-	req->npages = npages;
+
+	/* npages > 0 means HCA asking host to allocate/give pages,
+	 * npages < 0 means HCA asking host to reclaim back the pages allocated.
+	 * Here we are restricting the maximum number of pages that can be
+	 * reclaimed to be MAX_RECLAIM_NPAGES. Note that MAX_RECLAIM_NPAGES is
+	 * a negative value.
+	 * Since MAX_RECLAIM is negative, we are using max() to restrict
+	 * req->npages (and not min ()).
+	 */
+	req->npages = max_t(s32, npages, MAX_RECLAIM_NPAGES);
 	req->ec_function = ec_function;
 	req->release_all = release_all;
 	INIT_WORK(&req->work, pages_work_handler);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
index 8bce730b5c5b..db2bd3ad63ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -28,6 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
 {
 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
 
+	if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP))
+		return -EOPNOTSUPP;
+
 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
 	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
@@ -44,6 +47,10 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
 	void *attr;
 
+	if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) ||
+	    !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
+		return -EOPNOTSUPP;
+
 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
 	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index b2986175d9af..b706f1486504 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
 	struct mlx5_core_dev *mdev = sf_dev->mdev;
 
 	set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+	mlx5_drain_health_wq(mdev);
 	mlx5_unload_one(mdev, false);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 8c2a34a0d6be..baefb9a3fa05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -251,9 +251,9 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
 	output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level);
 
 	output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out,
-						 flow_table_context.sw_owner_icm_root_1);
+						 flow_table_context.sws.sw_owner_icm_root_1);
 	output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out,
-						 flow_table_context.sw_owner_icm_root_0);
+						 flow_table_context.sws.sw_owner_icm_root_0);
 
 	return 0;
 }
@@ -480,15 +480,15 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
 		 */
 		if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, attr->icm_addr_rx);
+				   sws.sw_owner_icm_root_0, attr->icm_addr_rx);
 		} else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, attr->icm_addr_tx);
+				   sws.sw_owner_icm_root_0, attr->icm_addr_tx);
 		} else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, attr->icm_addr_rx);
+				   sws.sw_owner_icm_root_0, attr->icm_addr_rx);
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_1, attr->icm_addr_tx);
+				   sws.sw_owner_icm_root_1, attr->icm_addr_tx);
 		}
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 042ca0349124..d1db04baa1fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -7,7 +7,7 @@
 /* don't try to optimize STE allocation if the stack is too constaraining */
 #define DR_RULE_MAX_STES_OPTIMIZED 0
 #else
-#define DR_RULE_MAX_STES_OPTIMIZED 5
+#define DR_RULE_MAX_STES_OPTIMIZED 2
 #endif
 #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 50c2554c9ccf..833cb68c744f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -9,14 +9,6 @@
 #include "fs_dr.h"
 #include "dr_types.h"
 
-static bool dr_is_fw_term_table(struct mlx5_flow_table *ft)
-{
-	if (ft->flags & MLX5_FLOW_TABLE_TERMINATION)
-		return true;
-
-	return false;
-}
-
 static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      u32 underlay_qpn,
@@ -70,7 +62,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 	u32 flags;
 	int err;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
 								    ft_attr,
 								    next_ft);
@@ -110,7 +102,7 @@ static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 	struct mlx5dr_action *action = ft->fs_dr_table.miss_action;
 	int err;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
 
 	err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table);
@@ -135,7 +127,7 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
 					 struct mlx5_flow_table *ft,
 					 struct mlx5_flow_table *next_ft)
 {
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
 
 	return set_miss_action(ns, ft, next_ft);
@@ -154,7 +146,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
 					    match_criteria_enable);
 	struct mlx5dr_match_parameters mask;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in,
 								    fg);
 
@@ -179,7 +171,7 @@ static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
 					  struct mlx5_flow_table *ft,
 					  struct mlx5_flow_group *fg)
 {
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
 
 	return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher);
@@ -279,7 +271,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	int err = 0;
 	int i;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
 
 	actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
@@ -306,12 +298,12 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	match_sz = sizeof(fte->val);
 
 	/* Drop reformat action bit if destination vport set with reformat */
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			if (!contain_vport_reformat_action(dst))
 				continue;
 
-			fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 			break;
 		}
 	}
@@ -321,7 +313,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	 * TX: modify header -> push vlan -> encap
 	 * RX: decap -> pop vlan -> modify header
 	 */
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
 		enum mlx5dr_action_reformat_type decap_type =
 			DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
 
@@ -337,26 +329,26 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = tmp_action;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
 		bool is_decap;
 
-		if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) {
+		if (fte->act_dests.action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) {
 			err = -EINVAL;
 			mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n");
 			goto free_actions;
 		}
 
-		is_decap = fte->action.pkt_reformat->reformat_type ==
+		is_decap = fte->act_dests.action.pkt_reformat->reformat_type ==
 			   MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
 
 		if (is_decap)
 			actions[num_actions++] =
-				fte->action.pkt_reformat->action.dr_action;
+				fte->act_dests.action.pkt_reformat->action.dr_action;
 		else
 			delay_encap_set = true;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
 		tmp_action =
 			mlx5dr_action_create_pop_vlan();
 		if (!tmp_action) {
@@ -367,7 +359,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = tmp_action;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
 		tmp_action =
 			mlx5dr_action_create_pop_vlan();
 		if (!tmp_action) {
@@ -378,12 +370,12 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = tmp_action;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		actions[num_actions++] =
-			fte->action.modify_hdr->action.dr_action;
+			fte->act_dests.action.modify_hdr->action.dr_action;
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
-		tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+		tmp_action = create_action_push_vlan(domain, &fte->act_dests.action.vlan[0]);
 		if (!tmp_action) {
 			err = -ENOMEM;
 			goto free_actions;
@@ -392,8 +384,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = tmp_action;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
-		tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+		tmp_action = create_action_push_vlan(domain, &fte->act_dests.action.vlan[1]);
 		if (!tmp_action) {
 			err = -ENOMEM;
 			goto free_actions;
@@ -404,11 +396,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 
 	if (delay_encap_set)
 		actions[num_actions++] =
-			fte->action.pkt_reformat->action.dr_action;
+			fte->act_dests.action.pkt_reformat->action.dr_action;
 
 	/* The order of the actions below is not important */
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
 		tmp_action = mlx5dr_action_create_drop();
 		if (!tmp_action) {
 			err = -ENOMEM;
@@ -418,9 +410,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		term_actions[num_term_actions++].dest = tmp_action;
 	}
 
-	if (fte->flow_context.flow_tag) {
+	if (fte->act_dests.flow_context.flow_tag) {
 		tmp_action =
-			mlx5dr_action_create_tag(fte->flow_context.flow_tag);
+			mlx5dr_action_create_tag(fte->act_dests.flow_context.flow_tag);
 		if (!tmp_action) {
 			err = -ENOMEM;
 			goto free_actions;
@@ -429,7 +421,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = tmp_action;
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			enum mlx5_flow_destination_type type = dst->dest_attr.type;
 			u32 id;
@@ -510,7 +502,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			u32 id;
 
@@ -537,19 +529,21 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
-	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
-		if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) {
+	if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+		struct mlx5_flow_act *action = &fte->act_dests.action;
+
+		if (fte->act_dests.action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) {
 			err = -EOPNOTSUPP;
 			goto free_actions;
 		}
 
 		tmp_action =
 			mlx5dr_action_create_aso(domain,
-						 fte->action.exe_aso.object_id,
-						 fte->action.exe_aso.return_reg_id,
-						 fte->action.exe_aso.type,
-						 fte->action.exe_aso.flow_meter.init_color,
-						 fte->action.exe_aso.flow_meter.meter_idx);
+						 action->exe_aso.object_id,
+						 action->exe_aso.return_reg_id,
+						 action->exe_aso.type,
+						 action->exe_aso.flow_meter.init_color,
+						 action->exe_aso.flow_meter.meter_idx);
 		if (!tmp_action) {
 			err = -ENOMEM;
 			goto free_actions;
@@ -576,8 +570,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		actions[num_actions++] = term_actions->dest;
 	} else if (num_term_actions > 1) {
 		bool ignore_flow_level =
-			!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
-		u32 flow_source = fte->flow_context.flow_source;
+			!!(fte->act_dests.action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+		u32 flow_source = fte->act_dests.flow_context.flow_source;
 
 		if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
 		    fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
@@ -601,7 +595,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 				  &params,
 				  num_actions,
 				  actions,
-				  fte->flow_context.flow_source);
+				  fte->act_dests.flow_context.flow_source);
 	if (!rule) {
 		err = -EINVAL;
 		goto free_actions;
@@ -740,7 +734,7 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
 	int err;
 	int i;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
 
 	err = mlx5dr_rule_destroy(rule->dr_rule);
@@ -765,7 +759,7 @@ static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
 	struct fs_fte fte_tmp = {};
 	int ret;
 
-	if (dr_is_fw_term_table(ft))
+	if (mlx5_fs_cmd_is_fw_term_table(ft))
 		return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
 
 	/* Backup current dr rule details */
@@ -819,11 +813,11 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
 static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
 					enum fs_flow_table_type ft_type)
 {
-	u32 steering_caps = 0;
+	u32 steering_caps = MLX5_FLOW_STEERING_CAP_DUPLICATE_MATCH;
 
 	if (ft_type != FS_FT_FDB ||
 	    MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
-		return 0;
+		return steering_caps;
 
 	steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX;
 	steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/Makefile
new file mode 100644
index 000000000000..c78512eed8d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
new file mode 100644
index 000000000000..f39d636ff39a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
@@ -0,0 +1,926 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_H_
+#define MLX5HWS_H_
+
+struct mlx5hws_context;
+struct mlx5hws_table;
+struct mlx5hws_matcher;
+struct mlx5hws_rule;
+
+enum mlx5hws_table_type {
+	MLX5HWS_TABLE_TYPE_FDB,
+	MLX5HWS_TABLE_TYPE_MAX,
+};
+
+enum mlx5hws_matcher_resource_mode {
+	/* Allocate resources based on number of rules with minimal failure probability */
+	MLX5HWS_MATCHER_RESOURCE_MODE_RULE,
+	/* Allocate fixed size hash table based on given column and rows */
+	MLX5HWS_MATCHER_RESOURCE_MODE_HTABLE,
+};
+
+enum mlx5hws_action_type {
+	MLX5HWS_ACTION_TYP_LAST,
+	MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2,
+	MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2,
+	MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2,
+	MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3,
+	MLX5HWS_ACTION_TYP_DROP,
+	MLX5HWS_ACTION_TYP_MISS,
+	MLX5HWS_ACTION_TYP_TBL,
+	MLX5HWS_ACTION_TYP_CTR,
+	MLX5HWS_ACTION_TYP_TAG,
+	MLX5HWS_ACTION_TYP_MODIFY_HDR,
+	MLX5HWS_ACTION_TYP_VPORT,
+	MLX5HWS_ACTION_TYP_POP_VLAN,
+	MLX5HWS_ACTION_TYP_PUSH_VLAN,
+	MLX5HWS_ACTION_TYP_ASO_METER,
+	MLX5HWS_ACTION_TYP_INSERT_HEADER,
+	MLX5HWS_ACTION_TYP_REMOVE_HEADER,
+	MLX5HWS_ACTION_TYP_RANGE,
+	MLX5HWS_ACTION_TYP_SAMPLER,
+	MLX5HWS_ACTION_TYP_DEST_ARRAY,
+	MLX5HWS_ACTION_TYP_MAX,
+};
+
+enum mlx5hws_action_flags {
+	MLX5HWS_ACTION_FLAG_HWS_FDB = 1 << 0,
+	/* Shared action can be used over a few threads, since the
+	 * data is written only once at the creation of the action.
+	 */
+	MLX5HWS_ACTION_FLAG_SHARED = 1 << 1,
+};
+
+enum mlx5hws_action_aso_meter_color {
+	MLX5HWS_ACTION_ASO_METER_COLOR_RED = 0x0,
+	MLX5HWS_ACTION_ASO_METER_COLOR_YELLOW = 0x1,
+	MLX5HWS_ACTION_ASO_METER_COLOR_GREEN = 0x2,
+	MLX5HWS_ACTION_ASO_METER_COLOR_UNDEFINED = 0x3,
+};
+
+enum mlx5hws_send_queue_actions {
+	/* Start executing all pending queued rules */
+	MLX5HWS_SEND_QUEUE_ACTION_DRAIN_ASYNC = 1 << 0,
+	/* Start executing all pending queued rules wait till completion */
+	MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC = 1 << 1,
+};
+
+struct mlx5hws_context_attr {
+	u16 queues;
+	u16 queue_size;
+	bool bwc; /* add support for backward compatible API*/
+};
+
+struct mlx5hws_table_attr {
+	enum mlx5hws_table_type type;
+	u32 level;
+};
+
+enum mlx5hws_matcher_flow_src {
+	MLX5HWS_MATCHER_FLOW_SRC_ANY = 0x0,
+	MLX5HWS_MATCHER_FLOW_SRC_WIRE = 0x1,
+	MLX5HWS_MATCHER_FLOW_SRC_VPORT = 0x2,
+};
+
+enum mlx5hws_matcher_insert_mode {
+	MLX5HWS_MATCHER_INSERT_BY_HASH = 0x0,
+	MLX5HWS_MATCHER_INSERT_BY_INDEX = 0x1,
+};
+
+enum mlx5hws_matcher_distribute_mode {
+	MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH = 0x0,
+	MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR = 0x1,
+};
+
+struct mlx5hws_matcher_attr {
+	/* Processing priority inside table */
+	u32 priority;
+	/* Provide all rules with unique rule_idx in num_log range to reduce locking */
+	bool optimize_using_rule_idx;
+	/* Resource mode and corresponding size */
+	enum mlx5hws_matcher_resource_mode mode;
+	/* Optimize insertion in case packet origin is the same for all rules */
+	enum mlx5hws_matcher_flow_src optimize_flow_src;
+	/* Define the insertion and distribution modes for this matcher */
+	enum mlx5hws_matcher_insert_mode insert_mode;
+	enum mlx5hws_matcher_distribute_mode distribute_mode;
+	/* Define whether the created matcher supports resizing into a bigger matcher */
+	bool resizable;
+	union {
+		struct {
+			u8 sz_row_log;
+			u8 sz_col_log;
+		} table;
+
+		struct {
+			u8 num_log;
+		} rule;
+	};
+	/* Optional AT attach configuration - Max number of additional AT */
+	u8 max_num_of_at_attach;
+};
+
+struct mlx5hws_rule_attr {
+	void *user_data;
+	/* Valid if matcher optimize_using_rule_idx is set or
+	 * if matcher is configured to insert rules by index.
+	 */
+	u32 rule_idx;
+	u32 flow_source;
+	u16 queue_id;
+	u32 burst:1;
+};
+
+/* In actions that take offset, the offset is unique, pointing to a single
+ * resource and the user should not reuse the same index because data changing
+ * is not atomic.
+ */
+struct mlx5hws_rule_action {
+	struct mlx5hws_action *action;
+	union {
+		struct {
+			u32 value;
+		} tag;
+
+		struct {
+			u32 offset;
+		} counter;
+
+		struct {
+			u32 offset;
+			u8 *data;
+		} modify_header;
+
+		struct {
+			u32 offset;
+			u8 hdr_idx;
+			u8 *data;
+		} reformat;
+
+		struct {
+			__be32 vlan_hdr;
+		} push_vlan;
+
+		struct {
+			u32 offset;
+			enum mlx5hws_action_aso_meter_color init_color;
+		} aso_meter;
+	};
+};
+
+struct mlx5hws_action_reformat_header {
+	size_t sz;
+	void *data;
+};
+
+struct mlx5hws_action_insert_header {
+	struct mlx5hws_action_reformat_header hdr;
+	/* PRM start anchor to which header will be inserted */
+	u8 anchor;
+	/* Header insertion offset in bytes, from the start
+	 * anchor to the location where new header will be inserted.
+	 */
+	u8 offset;
+	/* Indicates this header insertion adds encapsulation header to the packet,
+	 * requiring device to update offloaded fields (for example IPv4 total length).
+	 */
+	bool encap;
+};
+
+struct mlx5hws_action_remove_header_attr {
+	/* PRM start anchor from which header will be removed */
+	u8 anchor;
+	/* Header remove offset in bytes, from the start
+	 * anchor to the location where remove header starts.
+	 */
+	u8 offset;
+	/* Indicates the removed header size in bytes */
+	size_t size;
+};
+
+struct mlx5hws_action_mh_pattern {
+	/* Byte size of modify actions provided by "data" */
+	size_t sz;
+	/* PRM format modify actions pattern */
+	__be64 *data;
+};
+
+struct mlx5hws_action_dest_attr {
+	/* Required destination action to forward the packet */
+	struct mlx5hws_action *dest;
+	/* Optional reformat action */
+	struct mlx5hws_action *reformat;
+};
+
+/**
+ * mlx5hws_is_supported - Check whether HWS is supported
+ *
+ * @mdev: The device to check.
+ *
+ * Return: true if supported, false otherwise.
+ */
+static inline bool mlx5hws_is_supported(struct mlx5_core_dev *mdev)
+{
+	u8 ignore_flow_level_rtc_valid;
+	u8 wqe_based_flow_table_update;
+
+	wqe_based_flow_table_update =
+		MLX5_CAP_GEN(mdev, wqe_based_flow_table_update_cap);
+	ignore_flow_level_rtc_valid =
+		MLX5_CAP_FLOWTABLE(mdev,
+				   flow_table_properties_nic_receive.ignore_flow_level_rtc_valid);
+
+	return wqe_based_flow_table_update && ignore_flow_level_rtc_valid;
+}
+
+/**
+ * mlx5hws_context_open - Open a context used for direct rule insertion
+ * using hardware steering.
+ *
+ * @mdev: The device to be used for HWS.
+ * @attr: Attributes used for context open.
+ *
+ * Return: pointer to mlx5hws_context on success NULL otherwise.
+ */
+struct mlx5hws_context *
+mlx5hws_context_open(struct mlx5_core_dev *mdev,
+		     struct mlx5hws_context_attr *attr);
+
+/**
+ * mlx5hws_context_close - Close a context used for direct hardware steering.
+ *
+ * @ctx: mlx5hws context to close.
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_context_close(struct mlx5hws_context *ctx);
+
+/**
+ * mlx5hws_context_set_peer - Set a peer context.
+ * Each context can have multiple contexts as peers.
+ *
+ * @ctx: The context in which the peer_ctx will be peered to it.
+ * @peer_ctx: The peer context.
+ * @peer_vhca_id: The peer context vhca id.
+ */
+void mlx5hws_context_set_peer(struct mlx5hws_context *ctx,
+			      struct mlx5hws_context *peer_ctx,
+			      u16 peer_vhca_id);
+
+/**
+ * mlx5hws_table_create - Create a new direct rule table.
+ * Each table can contain multiple matchers.
+ *
+ * @ctx: The context in which the new table will be opened.
+ * @attr: Attributes used for table creation.
+ *
+ * Return: pointer to mlx5hws_table on success NULL otherwise.
+ */
+struct mlx5hws_table *
+mlx5hws_table_create(struct mlx5hws_context *ctx,
+		     struct mlx5hws_table_attr *attr);
+
+/**
+ * mlx5hws_table_destroy - Destroy direct rule table.
+ *
+ * @tbl: Table to destroy.
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_table_destroy(struct mlx5hws_table *tbl);
+
+/**
+ * mlx5hws_table_get_id() - Get ID of the flow table.
+ *
+ * @tbl:Table to get ID of.
+ *
+ * Return: ID of the table.
+ */
+u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl);
+
+/**
+ * mlx5hws_table_set_default_miss - Set default miss table for mlx5hws_table
+ * by using another mlx5hws_table.
+ * Traffic which all table matchers miss will be forwarded to miss table.
+ *
+ * @tbl: Source table
+ * @miss_tbl: Target (miss) table, or NULL to remove current miss table
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_table_set_default_miss(struct mlx5hws_table *tbl,
+				   struct mlx5hws_table *miss_tbl);
+
+/**
+ * mlx5hws_match_template_create - Create a new match template based on items mask.
+ * The match template will be used for matcher creation.
+ *
+ * @ctx: The context in which the new template will be created.
+ * @match_param: Describe the mask based on PRM match parameters.
+ * @match_param_sz: Size of match param buffer.
+ * @match_criteria_enable: Bitmap for each sub-set in match_criteria buffer.
+ *
+ * Return: Pointer to mlx5hws_match_template on success, NULL otherwise.
+ */
+struct mlx5hws_match_template *
+mlx5hws_match_template_create(struct mlx5hws_context *ctx,
+			      u32 *match_param,
+			      u32 match_param_sz,
+			      u8 match_criteria_enable);
+
+/**
+ * mlx5hws_match_template_destroy - Destroy a match template.
+ *
+ * @mt: Match template to destroy.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int mlx5hws_match_template_destroy(struct mlx5hws_match_template *mt);
+
+/**
+ * mlx5hws_action_template_create - Create a new action template based on an action_type array.
+ *
+ * @action_type: An array of actions based on the order of actions which will be provided
+ *               with rule_actions to mlx5hws_rule_create. The last action is marked
+ *               using MLX5HWS_ACTION_TYP_LAST.
+ *
+ * Return: Pointer to mlx5hws_action_template on success, NULL otherwise.
+ */
+struct mlx5hws_action_template *
+mlx5hws_action_template_create(enum mlx5hws_action_type action_type[]);
+
+/**
+ * mlx5hws_action_template_destroy - Destroy action template.
+ *
+ * @at: Action template to destroy.
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_action_template_destroy(struct mlx5hws_action_template *at);
+
+/**
+ * mlx5hws_matcher_create - Create a new direct rule matcher.
+ *
+ * Each matcher can contain multiple rules. Matchers on the table will be
+ * processed by priority. Matching fields and mask are described by the
+ * match template. In some cases, multiple match templates can be used on
+ * the same matcher.
+ *
+ * @table: The table in which the new matcher will be opened.
+ * @mt: Array of match templates to be used on matcher.
+ * @num_of_mt: Number of match templates in mt array.
+ * @at: Array of action templates to be used on matcher.
+ * @num_of_at: Number of action templates in at array.
+ * @attr: Attributes used for matcher creation.
+ *
+ * Return: Pointer to mlx5hws_matcher on success, NULL otherwise.
+ *
+ */
+struct mlx5hws_matcher *
+mlx5hws_matcher_create(struct mlx5hws_table *table,
+		       struct mlx5hws_match_template *mt[],
+		       u8 num_of_mt,
+		       struct mlx5hws_action_template *at[],
+		       u8 num_of_at,
+		       struct mlx5hws_matcher_attr *attr);
+
+/**
+ * mlx5hws_matcher_destroy - Destroy a direct rule matcher.
+ *
+ * @matcher: Matcher to destroy.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher);
+
+/**
+ * mlx5hws_matcher_attach_at - Attach a new action template to a direct rule matcher.
+ *
+ * @matcher: Matcher to attach the action template to.
+ * @at: Action template to be attached to the matcher.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
+			      struct mlx5hws_action_template *at);
+
+/**
+ * mlx5hws_matcher_resize_set_target - Link two matchers and enable moving rules.
+ *
+ * Both matchers must be in the same table type, must be created with the
+ * 'resizable' property, and should have the same characteristics (e.g., same
+ * match templates and action templates). It is the user's responsibility to
+ * ensure that the destination matcher is allocated with the appropriate size.
+ *
+ * Once the function is completed, the user is:
+ * - Allowed to move rules from the source into the destination matcher.
+ * - No longer allowed to insert rules into the source matcher.
+ *
+ * The user is always allowed to insert rules into the destination matcher and
+ * to delete rules from any matcher.
+ *
+ * @src_matcher: Source matcher for moving rules from.
+ * @dst_matcher: Destination matcher for moving rules to.
+ *
+ * Return: Zero on successful move, non-zero otherwise.
+ */
+int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher,
+				      struct mlx5hws_matcher *dst_matcher);
+
+/**
+ * mlx5hws_matcher_resize_rule_move - Enqueue moving rule operation.
+ *
+ * This function enqueues the operation of moving a rule from the source
+ * matcher to the destination matcher.
+ *
+ * @src_matcher: Matcher that the rule belongs to.
+ * @rule: The rule to move.
+ * @attr: Rule attributes.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int mlx5hws_matcher_resize_rule_move(struct mlx5hws_matcher *src_matcher,
+				     struct mlx5hws_rule *rule,
+				     struct mlx5hws_rule_attr *attr);
+
+/**
+ * mlx5hws_rule_create - Enqueue create rule operation.
+ *
+ * @matcher: The matcher in which the new rule will be created.
+ * @mt_idx: Match template index to create the match with.
+ * @match_param: The match parameter PRM buffer used for value matching.
+ * @at_idx: Action template index to apply the actions with.
+ * @rule_actions: Rule actions to be executed on match.
+ * @attr: Rule creation attributes.
+ * @rule_handle: A valid rule handle. The handle doesn't require any initialization.
+ *
+ * Return: Zero on successful enqueue, non-zero otherwise.
+ */
+int mlx5hws_rule_create(struct mlx5hws_matcher *matcher,
+			u8 mt_idx,
+			u32 *match_param,
+			u8 at_idx,
+			struct mlx5hws_rule_action rule_actions[],
+			struct mlx5hws_rule_attr *attr,
+			struct mlx5hws_rule *rule_handle);
+
+/**
+ * mlx5hws_rule_destroy - Enqueue destroy rule operation.
+ *
+ * @rule: The rule destruction to enqueue.
+ * @attr: Rule destruction attributes.
+ *
+ * Return: Zero on successful enqueue, non-zero otherwise.
+ */
+int mlx5hws_rule_destroy(struct mlx5hws_rule *rule,
+			 struct mlx5hws_rule_attr *attr);
+
+/**
+ * mlx5hws_rule_action_update - Enqueue update actions on an existing rule.
+ *
+ * @rule: A valid rule handle to update.
+ * @at_idx: Action template index to update the actions with.
+ * @rule_actions: Rule actions to be executed on match.
+ * @attr: Rule update attributes.
+ *
+ * Return: Zero on successful enqueue, non-zero otherwise.
+ */
+int mlx5hws_rule_action_update(struct mlx5hws_rule *rule,
+			       u8 at_idx,
+			       struct mlx5hws_rule_action rule_actions[],
+			       struct mlx5hws_rule_attr *attr);
+
+/**
+ * mlx5hws_action_get_type - Get action type.
+ *
+ * @action: The action to get the type of.
+ *
+ * Return: action type.
+ */
+enum mlx5hws_action_type
+mlx5hws_action_get_type(struct mlx5hws_action *action);
+
+/**
+ * mlx5hws_action_create_dest_drop - Create a direct rule drop action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: Pointer to mlx5hws_action on success, NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_drop(struct mlx5hws_context *ctx,
+				u32 flags);
+
+/**
+ * mlx5hws_action_create_default_miss - Create a direct rule default miss action.
+ * Defaults are RX: Drop, TX: Wire.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: Pointer to mlx5hws_action on success, NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_default_miss(struct mlx5hws_context *ctx,
+				   u32 flags);
+
+/**
+ * mlx5hws_action_create_dest_table - Create direct rule goto table action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @tbl: Destination table.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_table(struct mlx5hws_context *ctx,
+				 struct mlx5hws_table *tbl,
+				 u32 flags);
+
+/**
+ * mlx5hws_action_create_dest_table_num - Create direct rule goto table number action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @tbl_num: Destination table number.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_table_num(struct mlx5hws_context *ctx,
+				     u32 tbl_num, u32 flags);
+
+/**
+ * mlx5hws_action_create_dest_match_range - Create direct rule range match action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @field: Field to comapare the value.
+ * @hit_ft: Flow table to go to on hit.
+ * @miss_ft: Flow table to go to on miss.
+ * @min: Minimal value of the field to be considered as hit.
+ * @max: Maximal value of the field to be considered as hit.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
+				       u32 field,
+				       struct mlx5_flow_table *hit_ft,
+				       struct mlx5_flow_table *miss_ft,
+				       u32 min, u32 max, u32 flags);
+
+/**
+ * mlx5hws_action_create_flow_sampler - Create direct rule flow sampler action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @sampler_id: Flow sampler object ID.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_flow_sampler(struct mlx5hws_context *ctx,
+				   u32 sampler_id, u32 flags);
+
+/**
+ * mlx5hws_action_create_dest_vport - Create direct rule goto vport action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @vport_num: Destination vport number.
+ * @vhca_id_valid: Tells if the vhca_id parameter is valid.
+ * @vhca_id: VHCA ID of the destination vport.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_vport(struct mlx5hws_context *ctx,
+				 u16 vport_num,
+				 bool vhca_id_valid,
+				 u16 vhca_id,
+				 u32 flags);
+
+/**
+ * mlx5hws_action_create_tag - Create direct rule TAG action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_tag(struct mlx5hws_context *ctx, u32 flags);
+
+/**
+ * mlx5hws_action_create_counter - Create direct rule counter action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @obj_id: Direct rule counter object ID.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_counter(struct mlx5hws_context *ctx,
+			      u32 obj_id,
+			      u32 flags);
+
+/**
+ * mlx5hws_action_create_reformat - Create direct rule reformat action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @reformat_type: Type of reformat prefixed with MLX5HWS_ACTION_TYP_REFORMAT.
+ * @num_of_hdrs: Number of provided headers in "hdrs" array.
+ * @hdrs: Headers array containing header information.
+ * @log_bulk_size: Number of unique values used with this reformat.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_reformat(struct mlx5hws_context *ctx,
+			       enum mlx5hws_action_type reformat_type,
+			       u8 num_of_hdrs,
+			       struct mlx5hws_action_reformat_header *hdrs,
+			       u32 log_bulk_size,
+			       u32 flags);
+
+/**
+ * mlx5hws_action_create_modify_header - Create direct rule modify header action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @num_of_patterns: Number of provided patterns in "patterns" array.
+ * @patterns: Patterns array containing pattern information.
+ * @log_bulk_size: Number of unique values used with this pattern.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_modify_header(struct mlx5hws_context *ctx,
+				    u8 num_of_patterns,
+				    struct mlx5hws_action_mh_pattern *patterns,
+				    u32 log_bulk_size,
+				    u32 flags);
+
+/**
+ * mlx5hws_action_create_aso_meter - Create direct rule ASO flow meter action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @obj_id: ASO object ID.
+ * @return_reg_c: Copy the ASO object value into this reg_c,
+ *		  after a packet hits a rule with this ASO object.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_aso_meter(struct mlx5hws_context *ctx,
+				u32 obj_id,
+				u8 return_reg_c,
+				u32 flags);
+
+/**
+ * mlx5hws_action_create_pop_vlan - Create direct rule pop vlan action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_pop_vlan(struct mlx5hws_context *ctx, u32 flags);
+
+/**
+ * mlx5hws_action_create_push_vlan - Create direct rule push vlan action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags);
+
+/**
+ * mlx5hws_action_create_dest_array - Create a dest array action, this action can
+ * duplicate packets and forward to multiple destinations in the destination list.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @num_dest: The number of dests attributes.
+ * @dests: The destination array. Each contains a destination action and can
+ *	   have additional actions.
+ * @ignore_flow_level: Whether to turn on 'ignore_flow_level' for this dest.
+ * @flow_source: Source port of the traffic for this actions.
+ * @flags: Action creation flags (enum mlx5hws_action_flags).
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
+				 size_t num_dest,
+				 struct mlx5hws_action_dest_attr *dests,
+				 bool ignore_flow_level,
+				 u32 flow_source,
+				 u32 flags);
+
+/**
+ * mlx5hws_action_create_insert_header - Create insert header action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @num_of_hdrs: Number of provided headers in "hdrs" array.
+ * @hdrs: Headers array containing header information.
+ * @log_bulk_size: Number of unique values used with this insert header.
+ * @flags: Action creation flags. (enum mlx5hws_action_flags)
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_insert_header(struct mlx5hws_context *ctx,
+				    u8 num_of_hdrs,
+				    struct mlx5hws_action_insert_header *hdrs,
+				    u32 log_bulk_size,
+				    u32 flags);
+
+/**
+ * mlx5hws_action_create_remove_header - Create remove header action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @attr: attributes that specifie the remove header type, PRM start anchor and
+ *	  the PRM end anchor or the PRM start anchor and remove size in bytes.
+ * @flags: Action creation flags. (enum mlx5hws_action_flags)
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_remove_header(struct mlx5hws_context *ctx,
+				    struct mlx5hws_action_remove_header_attr *attr,
+				    u32 flags);
+
+/**
+ * mlx5hws_action_create_last - Create direct rule LAST action.
+ *
+ * @ctx: The context in which the new action will be created.
+ * @flags: Action creation flags. (enum mlx5hws_action_flags)
+ *
+ * Return: pointer to mlx5hws_action on success NULL otherwise.
+ */
+struct mlx5hws_action *
+mlx5hws_action_create_last(struct mlx5hws_context *ctx, u32 flags);
+
+/**
+ * mlx5hws_action_destroy - Destroy direct rule action.
+ *
+ * @action: The action to destroy.
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_action_destroy(struct mlx5hws_action *action);
+
+enum mlx5hws_flow_op_status {
+	MLX5HWS_FLOW_OP_SUCCESS,
+	MLX5HWS_FLOW_OP_ERROR,
+};
+
+struct mlx5hws_flow_op_result {
+	enum mlx5hws_flow_op_status status;
+	void *user_data;
+};
+
+/**
+ * mlx5hws_send_queue_poll - Poll queue for rule creation and deletions completions.
+ *
+ * @ctx: The context to which the queue belong to.
+ * @queue_id: The id of the queue to poll.
+ * @res: Completion array.
+ * @res_nb: Maximum number of results to return.
+ *
+ * Return: negative number on failure, the number of completions otherwise.
+ */
+int mlx5hws_send_queue_poll(struct mlx5hws_context *ctx,
+			    u16 queue_id,
+			    struct mlx5hws_flow_op_result res[],
+			    u32 res_nb);
+
+/**
+ * mlx5hws_send_queue_action - Perform an action on the queue
+ *
+ * @ctx: The context to which the queue belong to.
+ * @queue_id: The id of the queue to perform the action on.
+ * @actions: Actions to perform on the queue (enum mlx5hws_send_queue_actions)
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
+			      u16 queue_id,
+			      u32 actions);
+
+/**
+ * mlx5hws_debug_dump - Dump HWS info
+ *
+ * @ctx: The context which to dump the info from.
+ *
+ * Return: zero on success non zero otherwise.
+ */
+int mlx5hws_debug_dump(struct mlx5hws_context *ctx);
+
+struct mlx5hws_bwc_matcher;
+struct mlx5hws_bwc_rule;
+
+struct mlx5hws_match_parameters {
+	size_t match_sz;
+	u32 *match_buf; /* Device spec format */
+};
+
+/**
+ * mlx5hws_bwc_matcher_create - Create a new BWC direct rule matcher.
+ *
+ * This function does the following:
+ *   - creates match template based on flow items
+ *   - creates an empty action template
+ *   - creates a usual mlx5hws_matcher with these mt and at, setting
+ *     its size to minimal
+ * Notes:
+ *   - table->ctx must have BWC support
+ *   - complex rules are not supported
+ *
+ * @table: The table in which the new matcher will be opened
+ * @priority: Priority for this BWC matcher
+ * @match_criteria_enable: Bitmask that defines matching criteria
+ * @mask: Match parameters
+ *
+ * Return: pointer to mlx5hws_bwc_matcher on success or NULL otherwise.
+ */
+struct mlx5hws_bwc_matcher *
+mlx5hws_bwc_matcher_create(struct mlx5hws_table *table,
+			   u32 priority,
+			   u8 match_criteria_enable,
+			   struct mlx5hws_match_parameters *mask);
+
+/**
+ * mlx5hws_bwc_matcher_destroy - Destroy BWC direct rule matcher.
+ *
+ * @bwc_matcher: Matcher to destroy
+ *
+ * Return: zero on success, non zero otherwise
+ */
+int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher);
+
+/**
+ * mlx5hws_bwc_rule_create - Create a new BWC rule.
+ *
+ * Unlike the usual rule creation function, this one is blocking: when the
+ * function returns, the rule is written to its place (no need to poll).
+ * This function does the following:
+ *   - finds matching action template based on the provided rule_actions, or
+ *     creates new action template if matching action template doesn't exist
+ *   - updates corresponding BWC matcher stats
+ *   - if needed, the function performs rehash:
+ *       - creates a new matcher based on mt, at, new_sz
+ *       - moves all the existing matcher rules to the new matcher
+ *       - removes the old matcher
+ *   - inserts new rule
+ *   - polls till completion is received
+ * Notes:
+ *   - matcher->tbl->ctx must have BWC support
+ *   - separate BWC ctx queues are used
+ *
+ * @bwc_matcher: The BWC matcher in which the new rule will be created.
+ * @params: Match perameters
+ * @flow_source: Flow source for this rule
+ * @rule_actions: Rule action to be executed on match
+ *
+ * Return: valid BWC rule handle on success, NULL otherwise
+ */
+struct mlx5hws_bwc_rule *
+mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher,
+			struct mlx5hws_match_parameters *params,
+			u32 flow_source,
+			struct mlx5hws_rule_action rule_actions[]);
+
+/**
+ * mlx5hws_bwc_rule_destroy - Destroy BWC direct rule.
+ *
+ * @bwc_rule: Rule to destroy.
+ *
+ * Return: zero on success, non zero otherwise.
+ */
+int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule);
+
+/**
+ * mlx5hws_bwc_rule_action_update - Update actions on an existing BWC rule.
+ *
+ * @bwc_rule: Rule to update
+ * @rule_actions: Rule action to update with
+ *
+ * Return: zero on successful update, non zero otherwise.
+ */
+int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
+				   struct mlx5hws_rule_action rule_actions[]);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.c
new file mode 100644
index 000000000000..b27bb4106532
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.c
@@ -0,0 +1,2604 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+#define MLX5HWS_ACTION_METER_INIT_COLOR_OFFSET 1
+
+/* Header removal size limited to 128B (64 words) */
+#define MLX5HWS_ACTION_REMOVE_HEADER_MAX_SIZE 128
+
+/* This is the longest supported action sequence for FDB table:
+ * DECAP, POP_VLAN, MODIFY, CTR, ASO, PUSH_VLAN, MODIFY, ENCAP, Term.
+ */
+static const u32 action_order_arr[MLX5HWS_TABLE_TYPE_MAX][MLX5HWS_ACTION_TYP_MAX] = {
+	[MLX5HWS_TABLE_TYPE_FDB] = {
+		BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) |
+		BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) |
+		BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2),
+		BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
+		BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
+		BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
+		BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
+		BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
+		BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) |
+		BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) |
+		BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3),
+		BIT(MLX5HWS_ACTION_TYP_CTR),
+		BIT(MLX5HWS_ACTION_TYP_TAG),
+		BIT(MLX5HWS_ACTION_TYP_ASO_METER),
+		BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
+		BIT(MLX5HWS_ACTION_TYP_TBL) |
+		BIT(MLX5HWS_ACTION_TYP_VPORT) |
+		BIT(MLX5HWS_ACTION_TYP_DROP) |
+		BIT(MLX5HWS_ACTION_TYP_SAMPLER) |
+		BIT(MLX5HWS_ACTION_TYP_RANGE) |
+		BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY),
+		BIT(MLX5HWS_ACTION_TYP_LAST),
+	},
+};
+
+static const char * const mlx5hws_action_type_str[] = {
+	[MLX5HWS_ACTION_TYP_LAST] = "LAST",
+	[MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2] = "TNL_L2_TO_L2",
+	[MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2] = "L2_TO_TNL_L2",
+	[MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2] = "TNL_L3_TO_L2",
+	[MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3] = "L2_TO_TNL_L3",
+	[MLX5HWS_ACTION_TYP_DROP] = "DROP",
+	[MLX5HWS_ACTION_TYP_TBL] = "TBL",
+	[MLX5HWS_ACTION_TYP_CTR] = "CTR",
+	[MLX5HWS_ACTION_TYP_TAG] = "TAG",
+	[MLX5HWS_ACTION_TYP_MODIFY_HDR] = "MODIFY_HDR",
+	[MLX5HWS_ACTION_TYP_VPORT] = "VPORT",
+	[MLX5HWS_ACTION_TYP_MISS] = "DEFAULT_MISS",
+	[MLX5HWS_ACTION_TYP_POP_VLAN] = "POP_VLAN",
+	[MLX5HWS_ACTION_TYP_PUSH_VLAN] = "PUSH_VLAN",
+	[MLX5HWS_ACTION_TYP_ASO_METER] = "ASO_METER",
+	[MLX5HWS_ACTION_TYP_DEST_ARRAY] = "DEST_ARRAY",
+	[MLX5HWS_ACTION_TYP_INSERT_HEADER] = "INSERT_HEADER",
+	[MLX5HWS_ACTION_TYP_REMOVE_HEADER] = "REMOVE_HEADER",
+	[MLX5HWS_ACTION_TYP_SAMPLER] = "SAMPLER",
+	[MLX5HWS_ACTION_TYP_RANGE] = "RANGE",
+};
+
+static_assert(ARRAY_SIZE(mlx5hws_action_type_str) == MLX5HWS_ACTION_TYP_MAX,
+	      "Missing mlx5hws_action_type_str");
+
+const char *mlx5hws_action_type_to_str(enum mlx5hws_action_type action_type)
+{
+	return mlx5hws_action_type_str[action_type];
+}
+
+enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action)
+{
+	return action->type;
+}
+
+static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx,
+					 enum mlx5hws_context_shared_stc_type stc_type,
+					 u8 tbl_type)
+{
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	struct mlx5hws_action_shared_stc *shared_stc;
+	int ret;
+
+	mutex_lock(&ctx->ctrl_lock);
+	if (ctx->common_res[tbl_type].shared_stc[stc_type]) {
+		ctx->common_res[tbl_type].shared_stc[stc_type]->refcount++;
+		mutex_unlock(&ctx->ctrl_lock);
+		return 0;
+	}
+
+	shared_stc = kzalloc(sizeof(*shared_stc), GFP_KERNEL);
+	if (!shared_stc) {
+		ret = -ENOMEM;
+		goto unlock_and_out;
+	}
+	switch (stc_type) {
+	case MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3:
+		stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_HEADER_REMOVE;
+		stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+		stc_attr.remove_header.decap = 0;
+		stc_attr.remove_header.start_anchor = MLX5_HEADER_ANCHOR_PACKET_START;
+		stc_attr.remove_header.end_anchor = MLX5_HEADER_ANCHOR_IPV6_IPV4;
+		break;
+	case MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP:
+		stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_REMOVE_WORDS;
+		stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		stc_attr.remove_words.start_anchor = MLX5_HEADER_ANCHOR_FIRST_VLAN_START;
+		stc_attr.remove_words.num_of_words = MLX5HWS_ACTION_HDR_LEN_L2_VLAN;
+		break;
+	default:
+		mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type);
+		pr_warn("HWS: Invalid stc_type: %d\n", stc_type);
+		ret = -EINVAL;
+		goto unlock_and_out;
+	}
+
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &shared_stc->stc_chunk);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate shared decap l2 STC\n");
+		goto free_shared_stc;
+	}
+
+	ctx->common_res[tbl_type].shared_stc[stc_type] = shared_stc;
+	ctx->common_res[tbl_type].shared_stc[stc_type]->refcount = 1;
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+
+free_shared_stc:
+	kfree(shared_stc);
+unlock_and_out:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+static int hws_action_get_shared_stc(struct mlx5hws_action *action,
+				     enum mlx5hws_context_shared_stc_type stc_type)
+{
+	struct mlx5hws_context *ctx = action->ctx;
+	int ret;
+
+	if (stc_type >= MLX5HWS_CONTEXT_SHARED_STC_MAX) {
+		pr_warn("HWS: Invalid shared stc_type: %d\n", stc_type);
+		return -EINVAL;
+	}
+
+	if (unlikely(!(action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB))) {
+		pr_warn("HWS: Invalid action->flags: %d\n", action->flags);
+		return -EINVAL;
+	}
+
+	ret = hws_action_get_shared_stc_nic(ctx, stc_type, MLX5HWS_TABLE_TYPE_FDB);
+	if (ret) {
+		mlx5hws_err(ctx,
+			    "Failed to allocate memory for FDB shared STCs (type: %d)\n",
+			    stc_type);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hws_action_put_shared_stc(struct mlx5hws_action *action,
+				      enum mlx5hws_context_shared_stc_type stc_type)
+{
+	enum mlx5hws_table_type tbl_type = MLX5HWS_TABLE_TYPE_FDB;
+	struct mlx5hws_action_shared_stc *shared_stc;
+	struct mlx5hws_context *ctx = action->ctx;
+
+	if (stc_type >= MLX5HWS_CONTEXT_SHARED_STC_MAX) {
+		pr_warn("HWS: Invalid shared stc_type: %d\n", stc_type);
+		return;
+	}
+
+	mutex_lock(&ctx->ctrl_lock);
+	if (--ctx->common_res[tbl_type].shared_stc[stc_type]->refcount) {
+		mutex_unlock(&ctx->ctrl_lock);
+		return;
+	}
+
+	shared_stc = ctx->common_res[tbl_type].shared_stc[stc_type];
+
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &shared_stc->stc_chunk);
+	kfree(shared_stc);
+	ctx->common_res[tbl_type].shared_stc[stc_type] = NULL;
+	mutex_unlock(&ctx->ctrl_lock);
+}
+
+static void hws_action_print_combo(struct mlx5hws_context *ctx,
+				   enum mlx5hws_action_type *user_actions)
+{
+	mlx5hws_err(ctx, "Invalid action_type sequence");
+	while (*user_actions != MLX5HWS_ACTION_TYP_LAST) {
+		mlx5hws_err(ctx, " %s", mlx5hws_action_type_to_str(*user_actions));
+		user_actions++;
+	}
+	mlx5hws_err(ctx, "\n");
+}
+
+bool mlx5hws_action_check_combo(struct mlx5hws_context *ctx,
+				enum mlx5hws_action_type *user_actions,
+				enum mlx5hws_table_type table_type)
+{
+	const u32 *order_arr = action_order_arr[table_type];
+	u8 order_idx = 0;
+	u8 user_idx = 0;
+	bool valid_combo;
+
+	if (table_type >= MLX5HWS_TABLE_TYPE_MAX) {
+		mlx5hws_err(ctx, "Invalid table_type %d", table_type);
+		return false;
+	}
+
+	while (order_arr[order_idx] != BIT(MLX5HWS_ACTION_TYP_LAST)) {
+		/* User action order validated move to next user action */
+		if (BIT(user_actions[user_idx]) & order_arr[order_idx])
+			user_idx++;
+
+		/* Iterate to the next supported action in the order */
+		order_idx++;
+	}
+
+	/* Combination is valid if all user action were processed */
+	valid_combo = user_actions[user_idx] == MLX5HWS_ACTION_TYP_LAST;
+	if (!valid_combo)
+		hws_action_print_combo(ctx, user_actions);
+
+	return valid_combo;
+}
+
+static bool
+hws_action_fixup_stc_attr(struct mlx5hws_context *ctx,
+			  struct mlx5hws_cmd_stc_modify_attr *stc_attr,
+			  struct mlx5hws_cmd_stc_modify_attr *fixup_stc_attr,
+			  enum mlx5hws_table_type table_type,
+			  bool is_mirror)
+{
+	bool use_fixup = false;
+	u32 fw_tbl_type;
+	u32 base_id;
+
+	fw_tbl_type = mlx5hws_table_get_res_fw_ft_type(table_type, is_mirror);
+
+	switch (stc_attr->action_type) {
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE:
+		if (is_mirror && stc_attr->ste_table.ignore_tx) {
+			fixup_stc_attr->action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
+			fixup_stc_attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+			fixup_stc_attr->stc_offset = stc_attr->stc_offset;
+			use_fixup = true;
+			break;
+		}
+		if (!is_mirror)
+			base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool,
+								 &stc_attr->ste_table.ste);
+		else
+			base_id =
+				mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool,
+								      &stc_attr->ste_table.ste);
+
+		*fixup_stc_attr = *stc_attr;
+		fixup_stc_attr->ste_table.ste_obj_id = base_id;
+		use_fixup = true;
+		break;
+
+	case MLX5_IFC_STC_ACTION_TYPE_TAG:
+		if (fw_tbl_type == FS_FT_FDB_TX) {
+			fixup_stc_attr->action_type = MLX5_IFC_STC_ACTION_TYPE_NOP;
+			fixup_stc_attr->action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+			fixup_stc_attr->stc_offset = stc_attr->stc_offset;
+			use_fixup = true;
+		}
+		break;
+
+	case MLX5_IFC_STC_ACTION_TYPE_ALLOW:
+		if (fw_tbl_type == FS_FT_FDB_TX || fw_tbl_type == FS_FT_FDB_RX) {
+			fixup_stc_attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_VPORT;
+			fixup_stc_attr->action_offset = stc_attr->action_offset;
+			fixup_stc_attr->stc_offset = stc_attr->stc_offset;
+			fixup_stc_attr->vport.esw_owner_vhca_id = ctx->caps->vhca_id;
+			fixup_stc_attr->vport.vport_num = ctx->caps->eswitch_manager_vport_number;
+			fixup_stc_attr->vport.eswitch_owner_vhca_id_valid =
+				ctx->caps->merged_eswitch;
+			use_fixup = true;
+		}
+		break;
+
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_VPORT:
+		if (stc_attr->vport.vport_num != MLX5_VPORT_UPLINK)
+			break;
+
+		if (fw_tbl_type == FS_FT_FDB_TX || fw_tbl_type == FS_FT_FDB_RX) {
+			/* The FW doesn't allow to go to wire in the TX/RX by JUMP_TO_VPORT */
+			fixup_stc_attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_UPLINK;
+			fixup_stc_attr->action_offset = stc_attr->action_offset;
+			fixup_stc_attr->stc_offset = stc_attr->stc_offset;
+			fixup_stc_attr->vport.vport_num = 0;
+			fixup_stc_attr->vport.esw_owner_vhca_id = stc_attr->vport.esw_owner_vhca_id;
+			fixup_stc_attr->vport.eswitch_owner_vhca_id_valid =
+				stc_attr->vport.eswitch_owner_vhca_id_valid;
+		}
+		use_fixup = true;
+		break;
+
+	default:
+		break;
+	}
+
+	return use_fixup;
+}
+
+int mlx5hws_action_alloc_single_stc(struct mlx5hws_context *ctx,
+				    struct mlx5hws_cmd_stc_modify_attr *stc_attr,
+				    u32 table_type,
+				    struct mlx5hws_pool_chunk *stc)
+__must_hold(&ctx->ctrl_lock)
+{
+	struct mlx5hws_cmd_stc_modify_attr cleanup_stc_attr = {0};
+	struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type];
+	struct mlx5hws_cmd_stc_modify_attr fixup_stc_attr = {0};
+	bool use_fixup;
+	u32 obj_0_id;
+	int ret;
+
+	ret = mlx5hws_pool_chunk_alloc(stc_pool, stc);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate single action STC\n");
+		return ret;
+	}
+
+	stc_attr->stc_offset = stc->offset;
+
+	/* Dynamic reparse not supported, overwrite and use default */
+	if (!mlx5hws_context_cap_dynamic_reparse(ctx))
+		stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+
+	obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
+
+	/* According to table/action limitation change the stc_attr */
+	use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false);
+	ret = mlx5hws_cmd_stc_modify(ctx->mdev, obj_0_id,
+				     use_fixup ? &fixup_stc_attr : stc_attr);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to modify STC action_type %d tbl_type %d\n",
+			    stc_attr->action_type, table_type);
+		goto free_chunk;
+	}
+
+	/* Modify the FDB peer */
+	if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
+		u32 obj_1_id;
+
+		obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
+
+		use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr,
+						      &fixup_stc_attr,
+						      table_type, true);
+		ret = mlx5hws_cmd_stc_modify(ctx->mdev, obj_1_id,
+					     use_fixup ? &fixup_stc_attr : stc_attr);
+		if (ret) {
+			mlx5hws_err(ctx,
+				    "Failed to modify peer STC action_type %d tbl_type %d\n",
+				    stc_attr->action_type, table_type);
+			goto clean_obj_0;
+		}
+	}
+
+	return 0;
+
+clean_obj_0:
+	cleanup_stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
+	cleanup_stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+	cleanup_stc_attr.stc_offset = stc->offset;
+	mlx5hws_cmd_stc_modify(ctx->mdev, obj_0_id, &cleanup_stc_attr);
+free_chunk:
+	mlx5hws_pool_chunk_free(stc_pool, stc);
+	return ret;
+}
+
+void mlx5hws_action_free_single_stc(struct mlx5hws_context *ctx,
+				    u32 table_type,
+				    struct mlx5hws_pool_chunk *stc)
+__must_hold(&ctx->ctrl_lock)
+{
+	struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type];
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	u32 obj_id;
+
+	/* Modify the STC not to point to an object */
+	stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+	stc_attr.stc_offset = stc->offset;
+	obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
+	mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
+
+	if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
+		obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
+		mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
+	}
+
+	mlx5hws_pool_chunk_free(stc_pool, stc);
+}
+
+static u32 hws_action_get_mh_stc_type(struct mlx5hws_context *ctx,
+				      __be64 pattern)
+{
+	u8 action_type = MLX5_GET(set_action_in, &pattern, action_type);
+
+	switch (action_type) {
+	case MLX5_MODIFICATION_TYPE_SET:
+		return MLX5_IFC_STC_ACTION_TYPE_SET;
+	case MLX5_MODIFICATION_TYPE_ADD:
+		return MLX5_IFC_STC_ACTION_TYPE_ADD;
+	case MLX5_MODIFICATION_TYPE_COPY:
+		return MLX5_IFC_STC_ACTION_TYPE_COPY;
+	case MLX5_MODIFICATION_TYPE_ADD_FIELD:
+		return MLX5_IFC_STC_ACTION_TYPE_ADD_FIELD;
+	default:
+		mlx5hws_err(ctx, "Unsupported action type: 0x%x\n", action_type);
+		return MLX5_IFC_STC_ACTION_TYPE_NOP;
+	}
+}
+
+static void hws_action_fill_stc_attr(struct mlx5hws_action *action,
+				     u32 obj_id,
+				     struct mlx5hws_cmd_stc_modify_attr *attr)
+{
+	attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+
+	switch (action->type) {
+	case MLX5HWS_ACTION_TYP_TAG:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_TAG;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		break;
+	case MLX5HWS_ACTION_TYP_DROP:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+		break;
+	case MLX5HWS_ACTION_TYP_MISS:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_ALLOW;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+		break;
+	case MLX5HWS_ACTION_TYP_CTR:
+		attr->id = obj_id;
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_COUNTER;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW0;
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
+	case MLX5HWS_ACTION_TYP_MODIFY_HDR:
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW6;
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+		if (action->modify_header.require_reparse)
+			attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+
+		if (action->modify_header.num_of_actions == 1) {
+			attr->modify_action.data = action->modify_header.single_action;
+			attr->action_type = hws_action_get_mh_stc_type(action->ctx,
+								       attr->modify_action.data);
+
+			if (attr->action_type == MLX5_IFC_STC_ACTION_TYPE_ADD ||
+			    attr->action_type == MLX5_IFC_STC_ACTION_TYPE_SET)
+				MLX5_SET(set_action_in, &attr->modify_action.data, data, 0);
+		} else {
+			attr->action_type = MLX5_IFC_STC_ACTION_TYPE_ACC_MODIFY_LIST;
+			attr->modify_header.arg_id = action->modify_header.arg_id;
+			attr->modify_header.pattern_id = action->modify_header.pat_id;
+		}
+		break;
+	case MLX5HWS_ACTION_TYP_TBL:
+	case MLX5HWS_ACTION_TYP_DEST_ARRAY:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+		attr->dest_table_id = obj_id;
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_HEADER_REMOVE;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		attr->remove_header.decap = 1;
+		attr->remove_header.start_anchor = MLX5_HEADER_ANCHOR_PACKET_START;
+		attr->remove_header.end_anchor = MLX5_HEADER_ANCHOR_INNER_MAC;
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2:
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3:
+	case MLX5HWS_ACTION_TYP_INSERT_HEADER:
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		if (!action->reformat.require_reparse)
+			attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_HEADER_INSERT;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW6;
+		attr->insert_header.encap = action->reformat.encap;
+		attr->insert_header.insert_anchor = action->reformat.anchor;
+		attr->insert_header.arg_id = action->reformat.arg_id;
+		attr->insert_header.header_size = action->reformat.header_size;
+		attr->insert_header.insert_offset = action->reformat.offset;
+		break;
+	case MLX5HWS_ACTION_TYP_ASO_METER:
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW6;
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_ASO;
+		attr->aso.aso_type = ASO_OPC_MOD_POLICER;
+		attr->aso.devx_obj_id = obj_id;
+		attr->aso.return_reg_id = action->aso.return_reg_id;
+		break;
+	case MLX5HWS_ACTION_TYP_VPORT:
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_VPORT;
+		attr->vport.vport_num = action->vport.vport_num;
+		attr->vport.esw_owner_vhca_id =	action->vport.esw_owner_vhca_id;
+		attr->vport.eswitch_owner_vhca_id_valid = action->vport.esw_owner_vhca_id_valid;
+		break;
+	case MLX5HWS_ACTION_TYP_POP_VLAN:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_REMOVE_WORDS;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		attr->remove_words.start_anchor = MLX5_HEADER_ANCHOR_FIRST_VLAN_START;
+		attr->remove_words.num_of_words = MLX5HWS_ACTION_HDR_LEN_L2_VLAN / 2;
+		break;
+	case MLX5HWS_ACTION_TYP_PUSH_VLAN:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_HEADER_INSERT;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW6;
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		attr->insert_header.encap = 0;
+		attr->insert_header.is_inline = 1;
+		attr->insert_header.insert_anchor = MLX5_HEADER_ANCHOR_PACKET_START;
+		attr->insert_header.insert_offset = MLX5HWS_ACTION_HDR_LEN_L2_MACS;
+		attr->insert_header.header_size = MLX5HWS_ACTION_HDR_LEN_L2_VLAN;
+		break;
+	case MLX5HWS_ACTION_TYP_REMOVE_HEADER:
+		attr->action_type = MLX5_IFC_STC_ACTION_TYPE_REMOVE_WORDS;
+		attr->remove_header.decap = 0; /* the mode we support decap is 0 */
+		attr->remove_words.start_anchor = action->remove_header.anchor;
+		/* the size is in already in words */
+		attr->remove_words.num_of_words = action->remove_header.size;
+		attr->action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+		attr->reparse_mode = MLX5_IFC_STC_REPARSE_ALWAYS;
+		break;
+	default:
+		mlx5hws_err(action->ctx, "Invalid action type %d\n", action->type);
+	}
+}
+
+static int
+hws_action_create_stcs(struct mlx5hws_action *action, u32 obj_id)
+{
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	struct mlx5hws_context *ctx = action->ctx;
+	int ret;
+
+	hws_action_fill_stc_attr(action, obj_id, &stc_attr);
+
+	/* Block unsupported parallel obj modify over the same base */
+	mutex_lock(&ctx->ctrl_lock);
+
+	/* Allocate STC for FDB */
+	if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) {
+		ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr,
+						      MLX5HWS_TABLE_TYPE_FDB,
+						      &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+		if (ret)
+			goto out_err;
+	}
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+
+out_err:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+static void
+hws_action_destroy_stcs(struct mlx5hws_action *action)
+{
+	struct mlx5hws_context *ctx = action->ctx;
+
+	/* Block unsupported parallel obj modify over the same base */
+	mutex_lock(&ctx->ctrl_lock);
+
+	if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB)
+		mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB,
+					       &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+
+	mutex_unlock(&ctx->ctrl_lock);
+}
+
+static bool hws_action_is_flag_hws_fdb(u32 flags)
+{
+	return flags & MLX5HWS_ACTION_FLAG_HWS_FDB;
+}
+
+static bool
+hws_action_validate_hws_action(struct mlx5hws_context *ctx, u32 flags)
+{
+	if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT)) {
+		mlx5hws_err(ctx, "Cannot create HWS action since HWS is not supported\n");
+		return false;
+	}
+
+	if ((flags & MLX5HWS_ACTION_FLAG_HWS_FDB) && !ctx->caps->eswitch_manager) {
+		mlx5hws_err(ctx, "Cannot create HWS action for FDB for non-eswitch-manager\n");
+		return false;
+	}
+
+	return true;
+}
+
+static struct mlx5hws_action *
+hws_action_create_generic_bulk(struct mlx5hws_context *ctx,
+			       u32 flags,
+			       enum mlx5hws_action_type action_type,
+			       u8 bulk_sz)
+{
+	struct mlx5hws_action *action;
+	int i;
+
+	if (!hws_action_is_flag_hws_fdb(flags)) {
+		mlx5hws_err(ctx,
+			    "Action (type: %d) flags must specify only HWS FDB\n", action_type);
+		return NULL;
+	}
+
+	if (!hws_action_validate_hws_action(ctx, flags))
+		return NULL;
+
+	action = kcalloc(bulk_sz, sizeof(*action), GFP_KERNEL);
+	if (!action)
+		return NULL;
+
+	for (i = 0; i < bulk_sz; i++) {
+		action[i].ctx = ctx;
+		action[i].flags = flags;
+		action[i].type = action_type;
+	}
+
+	return action;
+}
+
+static struct mlx5hws_action *
+hws_action_create_generic(struct mlx5hws_context *ctx,
+			  u32 flags,
+			  enum mlx5hws_action_type action_type)
+{
+	return hws_action_create_generic_bulk(ctx, flags, action_type, 1);
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_table_num(struct mlx5hws_context *ctx,
+				     u32 table_id,
+				     u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_TBL);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, table_id);
+	if (ret)
+		goto free_action;
+
+	action->dest_obj.obj_id = table_id;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_table(struct mlx5hws_context *ctx,
+				 struct mlx5hws_table *tbl,
+				 u32 flags)
+{
+	return mlx5hws_action_create_dest_table_num(ctx, tbl->ft_id, flags);
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_drop(struct mlx5hws_context *ctx, u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_DROP);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_default_miss(struct mlx5hws_context *ctx, u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_MISS);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_tag(struct mlx5hws_context *ctx, u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_TAG);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+static struct mlx5hws_action *
+hws_action_create_aso(struct mlx5hws_context *ctx,
+		      enum mlx5hws_action_type action_type,
+		      u32 obj_id,
+		      u8 return_reg_id,
+		      u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, action_type);
+	if (!action)
+		return NULL;
+
+	action->aso.obj_id = obj_id;
+	action->aso.return_reg_id = return_reg_id;
+
+	ret = hws_action_create_stcs(action, obj_id);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_aso_meter(struct mlx5hws_context *ctx,
+				u32 obj_id,
+				u8 return_reg_id,
+				u32 flags)
+{
+	return hws_action_create_aso(ctx, MLX5HWS_ACTION_TYP_ASO_METER,
+				     obj_id, return_reg_id, flags);
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_counter(struct mlx5hws_context *ctx,
+			      u32 obj_id,
+			      u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_CTR);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, obj_id);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_vport(struct mlx5hws_context *ctx,
+				 u16 vport_num,
+				 bool vhca_id_valid,
+				 u16 vhca_id,
+				 u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	if (!(flags & MLX5HWS_ACTION_FLAG_HWS_FDB)) {
+		mlx5hws_err(ctx, "Vport action is supported for FDB only\n");
+		return NULL;
+	}
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_VPORT);
+	if (!action)
+		return NULL;
+
+	if (!ctx->caps->merged_eswitch && vhca_id_valid && vhca_id != ctx->caps->vhca_id) {
+		mlx5hws_err(ctx, "Non merged eswitch cannot send to other vhca\n");
+		goto free_action;
+	}
+
+	action->vport.vport_num = vport_num;
+	action->vport.esw_owner_vhca_id_valid = vhca_id_valid;
+
+	if (vhca_id_valid)
+		action->vport.esw_owner_vhca_id = vhca_id;
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed creating stc for vport %d\n", vport_num);
+		goto free_action;
+	}
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_PUSH_VLAN);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed creating stc for push vlan\n");
+		goto free_action;
+	}
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_pop_vlan(struct mlx5hws_context *ctx, u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_POP_VLAN);
+	if (!action)
+		return NULL;
+
+	ret = hws_action_get_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create remove stc for reformat\n");
+		goto free_action;
+	}
+
+	ret = hws_action_create_stcs(action, 0);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed creating stc for pop vlan\n");
+		goto free_shared;
+	}
+
+	return action;
+
+free_shared:
+	hws_action_put_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP);
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+static int
+hws_action_handle_insert_with_ptr(struct mlx5hws_action *action,
+				  u8 num_of_hdrs,
+				  struct mlx5hws_action_reformat_header *hdrs,
+				  u32 log_bulk_sz)
+{
+	size_t max_sz = 0;
+	u32 arg_id;
+	int ret, i;
+
+	for (i = 0; i < num_of_hdrs; i++) {
+		if (hdrs[i].sz % W_SIZE != 0) {
+			mlx5hws_err(action->ctx,
+				    "Header data size should be in WORD granularity\n");
+			return -EINVAL;
+		}
+		max_sz = max(hdrs[i].sz, max_sz);
+	}
+
+	/* Allocate single shared arg object for all headers */
+	ret = mlx5hws_arg_create(action->ctx,
+				 hdrs->data,
+				 max_sz,
+				 log_bulk_sz,
+				 action->flags & MLX5HWS_ACTION_FLAG_SHARED,
+				 &arg_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num_of_hdrs; i++) {
+		action[i].reformat.arg_id = arg_id;
+		action[i].reformat.header_size = hdrs[i].sz;
+		action[i].reformat.num_of_hdrs = num_of_hdrs;
+		action[i].reformat.max_hdr_sz = max_sz;
+		action[i].reformat.require_reparse = true;
+
+		if (action[i].type == MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2 ||
+		    action[i].type == MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3) {
+			action[i].reformat.anchor = MLX5_HEADER_ANCHOR_PACKET_START;
+			action[i].reformat.offset = 0;
+			action[i].reformat.encap = 1;
+		}
+
+		ret = hws_action_create_stcs(&action[i], 0);
+		if (ret) {
+			mlx5hws_err(action->ctx, "Failed to create stc for reformat\n");
+			goto free_stc;
+		}
+	}
+
+	return 0;
+
+free_stc:
+	while (i--)
+		hws_action_destroy_stcs(&action[i]);
+
+	mlx5hws_arg_destroy(action->ctx, arg_id);
+	return ret;
+}
+
+static int
+hws_action_handle_l2_to_tunnel_l3(struct mlx5hws_action *action,
+				  u8 num_of_hdrs,
+				  struct mlx5hws_action_reformat_header *hdrs,
+				  u32 log_bulk_sz)
+{
+	int ret;
+
+	/* The action is remove-l2-header + insert-l3-header */
+	ret = hws_action_get_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3);
+	if (ret) {
+		mlx5hws_err(action->ctx, "Failed to create remove stc for reformat\n");
+		return ret;
+	}
+
+	/* Reuse the insert with pointer for the L2L3 header */
+	ret = hws_action_handle_insert_with_ptr(action,
+						num_of_hdrs,
+						hdrs,
+						log_bulk_sz);
+	if (ret)
+		goto put_shared_stc;
+
+	return 0;
+
+put_shared_stc:
+	hws_action_put_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3);
+	return ret;
+}
+
+static void hws_action_prepare_decap_l3_actions(size_t data_sz,
+						u8 *mh_data,
+						int *num_of_actions)
+{
+	int actions;
+	u32 i;
+
+	/* Remove L2L3 outer headers */
+	MLX5_SET(stc_ste_param_remove, mh_data, action_type,
+		 MLX5_MODIFICATION_TYPE_REMOVE);
+	MLX5_SET(stc_ste_param_remove, mh_data, decap, 0x1);
+	MLX5_SET(stc_ste_param_remove, mh_data, remove_start_anchor,
+		 MLX5_HEADER_ANCHOR_PACKET_START);
+	MLX5_SET(stc_ste_param_remove, mh_data, remove_end_anchor,
+		 MLX5_HEADER_ANCHOR_INNER_IPV6_IPV4);
+	mh_data += MLX5HWS_ACTION_DOUBLE_SIZE; /* Assume every action is 2 dw */
+	actions = 1;
+
+	/* Add the new header using inline action 4Byte at a time, the header
+	 * is added in reversed order to the beginning of the packet to avoid
+	 * incorrect parsing by the HW. Since header is 14B or 18B an extra
+	 * two bytes are padded and later removed.
+	 */
+	for (i = 0; i < data_sz / MLX5HWS_ACTION_INLINE_DATA_SIZE + 1; i++) {
+		MLX5_SET(stc_ste_param_insert, mh_data, action_type,
+			 MLX5_MODIFICATION_TYPE_INSERT);
+		MLX5_SET(stc_ste_param_insert, mh_data, inline_data, 0x1);
+		MLX5_SET(stc_ste_param_insert, mh_data, insert_anchor,
+			 MLX5_HEADER_ANCHOR_PACKET_START);
+		MLX5_SET(stc_ste_param_insert, mh_data, insert_size, 2);
+		mh_data += MLX5HWS_ACTION_DOUBLE_SIZE;
+		actions++;
+	}
+
+	/* Remove first 2 extra bytes */
+	MLX5_SET(stc_ste_param_remove_words, mh_data, action_type,
+		 MLX5_MODIFICATION_TYPE_REMOVE_WORDS);
+	MLX5_SET(stc_ste_param_remove_words, mh_data, remove_start_anchor,
+		 MLX5_HEADER_ANCHOR_PACKET_START);
+	/* The hardware expects here size in words (2 bytes) */
+	MLX5_SET(stc_ste_param_remove_words, mh_data, remove_size, 1);
+	actions++;
+
+	*num_of_actions = actions;
+}
+
+static int
+hws_action_handle_tunnel_l3_to_l2(struct mlx5hws_action *action,
+				  u8 num_of_hdrs,
+				  struct mlx5hws_action_reformat_header *hdrs,
+				  u32 log_bulk_sz)
+{
+	u8 mh_data[MLX5HWS_ACTION_REFORMAT_DATA_SIZE] = {0};
+	struct mlx5hws_context *ctx = action->ctx;
+	u32 arg_id, pat_id;
+	int num_of_actions;
+	int mh_data_size;
+	int ret, i;
+
+	for (i = 0; i < num_of_hdrs; i++) {
+		if (hdrs[i].sz != MLX5HWS_ACTION_HDR_LEN_L2 &&
+		    hdrs[i].sz != MLX5HWS_ACTION_HDR_LEN_L2_W_VLAN) {
+			mlx5hws_err(ctx, "Data size is not supported for decap-l3\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Create a full modify header action list in case shared */
+	hws_action_prepare_decap_l3_actions(hdrs->sz, mh_data, &num_of_actions);
+	if (action->flags & MLX5HWS_ACTION_FLAG_SHARED)
+		mlx5hws_action_prepare_decap_l3_data(hdrs->data, mh_data, num_of_actions);
+
+	/* All DecapL3 cases require the same max arg size */
+	ret = mlx5hws_arg_create_modify_header_arg(ctx,
+						   (__be64 *)mh_data,
+						   num_of_actions,
+						   log_bulk_sz,
+						   action->flags & MLX5HWS_ACTION_FLAG_SHARED,
+						   &arg_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num_of_hdrs; i++) {
+		memset(mh_data, 0, MLX5HWS_ACTION_REFORMAT_DATA_SIZE);
+		hws_action_prepare_decap_l3_actions(hdrs[i].sz, mh_data, &num_of_actions);
+		mh_data_size = num_of_actions * MLX5HWS_MODIFY_ACTION_SIZE;
+
+		ret = mlx5hws_pat_get_pattern(ctx, (__be64 *)mh_data, mh_data_size, &pat_id);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed to allocate pattern for DecapL3\n");
+			goto free_stc_and_pat;
+		}
+
+		action[i].modify_header.max_num_of_actions = num_of_actions;
+		action[i].modify_header.num_of_actions = num_of_actions;
+		action[i].modify_header.num_of_patterns = num_of_hdrs;
+		action[i].modify_header.arg_id = arg_id;
+		action[i].modify_header.pat_id = pat_id;
+		action[i].modify_header.require_reparse =
+			mlx5hws_pat_require_reparse((__be64 *)mh_data, num_of_actions);
+
+		ret = hws_action_create_stcs(&action[i], 0);
+		if (ret) {
+			mlx5hws_pat_put_pattern(ctx, pat_id);
+			goto free_stc_and_pat;
+		}
+	}
+
+	return 0;
+
+free_stc_and_pat:
+	while (i--) {
+		hws_action_destroy_stcs(&action[i]);
+		mlx5hws_pat_put_pattern(ctx, action[i].modify_header.pat_id);
+	}
+
+	mlx5hws_arg_destroy(action->ctx, arg_id);
+	return ret;
+}
+
+static int
+hws_action_create_reformat_hws(struct mlx5hws_action *action,
+			       u8 num_of_hdrs,
+			       struct mlx5hws_action_reformat_header *hdrs,
+			       u32 bulk_size)
+{
+	int ret;
+
+	switch (action->type) {
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2:
+		ret = hws_action_create_stcs(action, 0);
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2:
+		ret = hws_action_handle_insert_with_ptr(action, num_of_hdrs, hdrs, bulk_size);
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3:
+		ret = hws_action_handle_l2_to_tunnel_l3(action, num_of_hdrs, hdrs, bulk_size);
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
+		ret = hws_action_handle_tunnel_l3_to_l2(action, num_of_hdrs, hdrs, bulk_size);
+		break;
+	default:
+		mlx5hws_err(action->ctx, "Invalid HWS reformat action type\n");
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_reformat(struct mlx5hws_context *ctx,
+			       enum mlx5hws_action_type reformat_type,
+			       u8 num_of_hdrs,
+			       struct mlx5hws_action_reformat_header *hdrs,
+			       u32 log_bulk_size,
+			       u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	if (!num_of_hdrs) {
+		mlx5hws_err(ctx, "Reformat num_of_hdrs cannot be zero\n");
+		return NULL;
+	}
+
+	action = hws_action_create_generic_bulk(ctx, flags, reformat_type, num_of_hdrs);
+	if (!action)
+		return NULL;
+
+	if ((flags & MLX5HWS_ACTION_FLAG_SHARED) && (log_bulk_size || num_of_hdrs > 1)) {
+		mlx5hws_err(ctx, "Reformat flags don't fit HWS (flags: 0x%x)\n", flags);
+		goto free_action;
+	}
+
+	ret = hws_action_create_reformat_hws(action, num_of_hdrs, hdrs, log_bulk_size);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create HWS reformat action\n");
+		goto free_action;
+	}
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+static int
+hws_action_create_modify_header_hws(struct mlx5hws_action *action,
+				    u8 num_of_patterns,
+				    struct mlx5hws_action_mh_pattern *pattern,
+				    u32 log_bulk_size)
+{
+	struct mlx5hws_context *ctx = action->ctx;
+	u16 num_actions, max_mh_actions = 0;
+	int i, ret, size_in_bytes;
+	u32 pat_id, arg_id = 0;
+	__be64 *new_pattern;
+	size_t pat_max_sz;
+
+	pat_max_sz = MLX5HWS_ARG_CHUNK_SIZE_MAX * MLX5HWS_ARG_DATA_SIZE;
+	size_in_bytes = pat_max_sz * sizeof(__be64);
+	new_pattern = kcalloc(num_of_patterns, size_in_bytes, GFP_KERNEL);
+	if (!new_pattern)
+		return -ENOMEM;
+
+	/* Calculate maximum number of mh actions for shared arg allocation */
+	for (i = 0; i < num_of_patterns; i++) {
+		size_t new_num_actions;
+		size_t cur_num_actions;
+		u32 nope_location;
+
+		cur_num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE;
+
+		mlx5hws_pat_calc_nope(pattern[i].data, cur_num_actions,
+				      pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE,
+				      &new_num_actions, &nope_location,
+				      &new_pattern[i * pat_max_sz]);
+
+		action[i].modify_header.nope_locations = nope_location;
+		action[i].modify_header.num_of_actions = new_num_actions;
+
+		max_mh_actions = max(max_mh_actions, new_num_actions);
+	}
+
+	if (mlx5hws_arg_get_arg_log_size(max_mh_actions) >= MLX5HWS_ARG_CHUNK_SIZE_MAX) {
+		mlx5hws_err(ctx, "Num of actions (%d) bigger than allowed\n",
+			    max_mh_actions);
+		ret = -EINVAL;
+		goto free_new_pat;
+	}
+
+	/* Allocate single shared arg for all patterns based on the max size */
+	if (max_mh_actions > 1) {
+		ret = mlx5hws_arg_create_modify_header_arg(ctx,
+							   pattern->data,
+							   max_mh_actions,
+							   log_bulk_size,
+							   action->flags &
+							   MLX5HWS_ACTION_FLAG_SHARED,
+							   &arg_id);
+		if (ret)
+			goto free_new_pat;
+	}
+
+	for (i = 0; i < num_of_patterns; i++) {
+		if (!mlx5hws_pat_verify_actions(ctx, pattern[i].data, pattern[i].sz)) {
+			mlx5hws_err(ctx, "Fail to verify pattern modify actions\n");
+			ret = -EINVAL;
+			goto free_stc_and_pat;
+		}
+		num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE;
+		action[i].modify_header.num_of_patterns = num_of_patterns;
+		action[i].modify_header.max_num_of_actions = max_mh_actions;
+
+		action[i].modify_header.require_reparse =
+			mlx5hws_pat_require_reparse(pattern[i].data, num_actions);
+
+		if (num_actions == 1) {
+			pat_id = 0;
+			/* Optimize single modify action to be used inline */
+			action[i].modify_header.single_action = pattern[i].data[0];
+			action[i].modify_header.single_action_type =
+				MLX5_GET(set_action_in, pattern[i].data, action_type);
+		} else {
+			/* Multiple modify actions require a pattern */
+			if (unlikely(action[i].modify_header.nope_locations)) {
+				size_t pattern_sz;
+
+				pattern_sz = action[i].modify_header.num_of_actions *
+					     MLX5HWS_MODIFY_ACTION_SIZE;
+				ret =
+				mlx5hws_pat_get_pattern(ctx,
+							&new_pattern[i * pat_max_sz],
+							pattern_sz, &pat_id);
+			} else {
+				ret = mlx5hws_pat_get_pattern(ctx,
+							      pattern[i].data,
+							      pattern[i].sz,
+							      &pat_id);
+			}
+			if (ret) {
+				mlx5hws_err(ctx,
+					    "Failed to allocate pattern for modify header\n");
+				goto free_stc_and_pat;
+			}
+
+			action[i].modify_header.arg_id = arg_id;
+			action[i].modify_header.pat_id = pat_id;
+		}
+		/* Allocate STC for each action representing a header */
+		ret = hws_action_create_stcs(&action[i], 0);
+		if (ret) {
+			if (pat_id)
+				mlx5hws_pat_put_pattern(ctx, pat_id);
+			goto free_stc_and_pat;
+		}
+	}
+
+	kfree(new_pattern);
+	return 0;
+
+free_stc_and_pat:
+	while (i--) {
+		hws_action_destroy_stcs(&action[i]);
+		if (action[i].modify_header.pat_id)
+			mlx5hws_pat_put_pattern(ctx, action[i].modify_header.pat_id);
+	}
+
+	if (arg_id)
+		mlx5hws_arg_destroy(ctx, arg_id);
+free_new_pat:
+	kfree(new_pattern);
+	return ret;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_modify_header(struct mlx5hws_context *ctx,
+				    u8 num_of_patterns,
+				    struct mlx5hws_action_mh_pattern *patterns,
+				    u32 log_bulk_size,
+				    u32 flags)
+{
+	struct mlx5hws_action *action;
+	int ret;
+
+	if (!num_of_patterns) {
+		mlx5hws_err(ctx, "Invalid number of patterns\n");
+		return NULL;
+	}
+	action = hws_action_create_generic_bulk(ctx, flags,
+						MLX5HWS_ACTION_TYP_MODIFY_HDR,
+						num_of_patterns);
+	if (!action)
+		return NULL;
+
+	if ((flags & MLX5HWS_ACTION_FLAG_SHARED) && (log_bulk_size || num_of_patterns > 1)) {
+		mlx5hws_err(ctx, "Action cannot be shared with requested pattern or size\n");
+		goto free_action;
+	}
+
+	ret = hws_action_create_modify_header_hws(action,
+						  num_of_patterns,
+						  patterns,
+						  log_bulk_size);
+	if (ret)
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
+				 size_t num_dest,
+				 struct mlx5hws_action_dest_attr *dests,
+				 bool ignore_flow_level,
+				 u32 flow_source,
+				 u32 flags)
+{
+	struct mlx5hws_cmd_set_fte_dest *dest_list = NULL;
+	struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
+	struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
+	struct mlx5hws_cmd_forward_tbl *fw_island;
+	struct mlx5hws_action *action;
+	u32 i /*, packet_reformat_id*/;
+	int ret;
+
+	if (num_dest <= 1) {
+		mlx5hws_err(ctx, "Action must have multiple dests\n");
+		return NULL;
+	}
+
+	if (flags == (MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED)) {
+		ft_attr.type = FS_FT_FDB;
+		ft_attr.level = ctx->caps->fdb_ft.max_level - 1;
+	} else {
+		mlx5hws_err(ctx, "Action flags not supported\n");
+		return NULL;
+	}
+
+	dest_list = kcalloc(num_dest, sizeof(*dest_list), GFP_KERNEL);
+	if (!dest_list)
+		return NULL;
+
+	for (i = 0; i < num_dest; i++) {
+		enum mlx5hws_action_type action_type = dests[i].dest->type;
+		struct mlx5hws_action *reformat_action = dests[i].reformat;
+
+		switch (action_type) {
+		case MLX5HWS_ACTION_TYP_TBL:
+			dest_list[i].destination_type =
+				MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id;
+			fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			fte_attr.ignore_flow_level = ignore_flow_level;
+			/* ToDo: In SW steering we have a handling of 'go to WIRE'
+			 * destination here by upper layer setting 'is_wire_ft' flag
+			 * if the destination is wire.
+			 * This is because uplink should be last dest in the list.
+			 */
+			break;
+		case MLX5HWS_ACTION_TYP_VPORT:
+			dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+			dest_list[i].destination_id = dests[i].dest->vport.vport_num;
+			fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			if (ctx->caps->merged_eswitch) {
+				dest_list[i].ext_flags |=
+					MLX5HWS_CMD_EXT_DEST_ESW_OWNER_VHCA_ID;
+				dest_list[i].esw_owner_vhca_id =
+					dests[i].dest->vport.esw_owner_vhca_id;
+			}
+			break;
+		default:
+			mlx5hws_err(ctx, "Unsupported action in dest_array\n");
+			goto free_dest_list;
+		}
+
+		if (reformat_action) {
+			mlx5hws_err(ctx, "dest_array with reformat action - unsupported\n");
+			goto free_dest_list;
+		}
+	}
+
+	fte_attr.dests_num = num_dest;
+	fte_attr.dests = dest_list;
+
+	fw_island = mlx5hws_cmd_forward_tbl_create(ctx->mdev, &ft_attr, &fte_attr);
+	if (!fw_island)
+		goto free_dest_list;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_DEST_ARRAY);
+	if (!action)
+		goto destroy_fw_island;
+
+	ret = hws_action_create_stcs(action, fw_island->ft_id);
+	if (ret)
+		goto free_action;
+
+	action->dest_array.fw_island = fw_island;
+	action->dest_array.num_dest = num_dest;
+	action->dest_array.dest_list = dest_list;
+
+	return action;
+
+free_action:
+	kfree(action);
+destroy_fw_island:
+	mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, fw_island);
+free_dest_list:
+	for (i = 0; i < num_dest; i++) {
+		if (dest_list[i].ext_reformat_id)
+			mlx5hws_cmd_packet_reformat_destroy(ctx->mdev,
+							    dest_list[i].ext_reformat_id);
+	}
+	kfree(dest_list);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_insert_header(struct mlx5hws_context *ctx,
+				    u8 num_of_hdrs,
+				    struct mlx5hws_action_insert_header *hdrs,
+				    u32 log_bulk_size,
+				    u32 flags)
+{
+	struct mlx5hws_action_reformat_header *reformat_hdrs;
+	struct mlx5hws_action *action;
+	int ret;
+	int i;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_INSERT_HEADER);
+	if (!action)
+		return NULL;
+
+	reformat_hdrs = kcalloc(num_of_hdrs, sizeof(*reformat_hdrs), GFP_KERNEL);
+	if (!reformat_hdrs)
+		goto free_action;
+
+	for (i = 0; i < num_of_hdrs; i++) {
+		if (hdrs[i].offset % W_SIZE != 0) {
+			mlx5hws_err(ctx, "Header offset should be in WORD granularity\n");
+			goto free_reformat_hdrs;
+		}
+
+		action[i].reformat.anchor = hdrs[i].anchor;
+		action[i].reformat.encap = hdrs[i].encap;
+		action[i].reformat.offset = hdrs[i].offset;
+
+		reformat_hdrs[i].sz = hdrs[i].hdr.sz;
+		reformat_hdrs[i].data = hdrs[i].hdr.data;
+	}
+
+	ret = hws_action_handle_insert_with_ptr(action, num_of_hdrs,
+						reformat_hdrs, log_bulk_size);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create HWS reformat action\n");
+		goto free_reformat_hdrs;
+	}
+
+	kfree(reformat_hdrs);
+
+	return action;
+
+free_reformat_hdrs:
+	kfree(reformat_hdrs);
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_remove_header(struct mlx5hws_context *ctx,
+				    struct mlx5hws_action_remove_header_attr *attr,
+				    u32 flags)
+{
+	struct mlx5hws_action *action;
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_REMOVE_HEADER);
+	if (!action)
+		return NULL;
+
+	/* support only remove anchor with size */
+	if (attr->size % W_SIZE != 0) {
+		mlx5hws_err(ctx,
+			    "Invalid size, HW supports header remove in WORD granularity\n");
+		goto free_action;
+	}
+
+	if (attr->size > MLX5HWS_ACTION_REMOVE_HEADER_MAX_SIZE) {
+		mlx5hws_err(ctx, "Header removal size limited to %u bytes\n",
+			    MLX5HWS_ACTION_REMOVE_HEADER_MAX_SIZE);
+		goto free_action;
+	}
+
+	action->remove_header.anchor = attr->anchor;
+	action->remove_header.size = attr->size / W_SIZE;
+
+	if (hws_action_create_stcs(action, 0))
+		goto free_action;
+
+	return action;
+
+free_action:
+	kfree(action);
+	return NULL;
+}
+
+static struct mlx5hws_definer *
+hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_definer *definer;
+	__be32 *tag;
+	int ret;
+
+	definer = kzalloc(sizeof(*definer), GFP_KERNEL);
+	if (!definer)
+		return NULL;
+
+	definer->dw_selector[0] = MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN / 4;
+	/* Set DW0 tag mask */
+	tag = (__force __be32 *)definer->mask.jumbo;
+	tag[MLX5HWS_RULE_JUMBO_MATCH_TAG_OFFSET_DW0] = htonl(0xffffUL << 16);
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	ret = mlx5hws_definer_get_obj(ctx, definer);
+	if (ret < 0) {
+		mutex_unlock(&ctx->ctrl_lock);
+		kfree(definer);
+		return NULL;
+	}
+
+	mutex_unlock(&ctx->ctrl_lock);
+	definer->obj_id = ret;
+
+	return definer;
+}
+
+static struct mlx5hws_matcher_action_ste *
+hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
+					 struct mlx5hws_definer *definer,
+					 u32 miss_ft_id)
+{
+	struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
+	struct mlx5hws_action_default_stc *default_stc;
+	struct mlx5hws_matcher_action_ste *table_ste;
+	struct mlx5hws_pool_attr pool_attr = {0};
+	struct mlx5hws_pool *ste_pool, *stc_pool;
+	struct mlx5hws_pool_chunk *ste;
+	u32 *rtc_0_id, *rtc_1_id;
+	u32 obj_id;
+	int ret;
+
+	/* Check if STE range is supported */
+	if (!IS_BIT_SET(ctx->caps->supp_ste_format_gen_wqe, MLX5_IFC_RTC_STE_FORMAT_RANGE)) {
+		mlx5hws_err(ctx, "Range STE format not supported\n");
+		return NULL;
+	}
+
+	table_ste = kzalloc(sizeof(*table_ste), GFP_KERNEL);
+	if (!table_ste)
+		return NULL;
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
+	pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
+	pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
+	pool_attr.alloc_log_sz = 1;
+	table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
+	if (!table_ste->pool) {
+		mlx5hws_err(ctx, "Failed to allocate memory ste pool\n");
+		goto free_ste;
+	}
+
+	/* Allocate RTC */
+	rtc_0_id = &table_ste->rtc_0_id;
+	rtc_1_id = &table_ste->rtc_1_id;
+	ste_pool = table_ste->pool;
+	ste = &table_ste->ste;
+	ste->order = 1;
+
+	rtc_attr.log_size = 0;
+	rtc_attr.log_depth = 0;
+	rtc_attr.miss_ft_id = miss_ft_id;
+	rtc_attr.num_hash_definer = 1;
+	rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
+	rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
+	rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
+	rtc_attr.fw_gen_wqe = true;
+	rtc_attr.is_scnd_range = true;
+
+	obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+
+	rtc_attr.pd = ctx->pd_num;
+	rtc_attr.ste_base = obj_id;
+	rtc_attr.ste_offset = ste->offset;
+	rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
+	rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false);
+
+	/* STC is a single resource (obj_id), use any STC for the ID */
+	stc_pool = ctx->stc_pool[MLX5HWS_TABLE_TYPE_FDB];
+	default_stc = ctx->common_res[MLX5HWS_TABLE_TYPE_FDB].default_stc;
+	obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
+	rtc_attr.stc_base = obj_id;
+
+	ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create RTC");
+		goto pool_destroy;
+	}
+
+	/* Create mirror RTC */
+	obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+	rtc_attr.ste_base = obj_id;
+	rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true);
+
+	obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
+	rtc_attr.stc_base = obj_id;
+
+	ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create mirror RTC");
+		goto destroy_rtc_0;
+	}
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return table_ste;
+
+destroy_rtc_0:
+	mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id);
+pool_destroy:
+	mlx5hws_pool_destroy(table_ste->pool);
+free_ste:
+	mutex_unlock(&ctx->ctrl_lock);
+	kfree(table_ste);
+	return NULL;
+}
+
+static void
+hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx,
+					  struct mlx5hws_matcher_action_ste *table_ste)
+{
+	mutex_lock(&ctx->ctrl_lock);
+
+	mlx5hws_cmd_rtc_destroy(ctx->mdev, table_ste->rtc_1_id);
+	mlx5hws_cmd_rtc_destroy(ctx->mdev, table_ste->rtc_0_id);
+	mlx5hws_pool_destroy(table_ste->pool);
+	kfree(table_ste);
+
+	mutex_unlock(&ctx->ctrl_lock);
+}
+
+static int
+hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx,
+					      struct mlx5hws_matcher_action_ste *table_ste,
+					      struct mlx5hws_action *hit_ft_action,
+					      struct mlx5hws_definer *range_definer,
+					      u32 min, u32 max)
+{
+	struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0};
+	struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0};
+	struct mlx5hws_wqe_gta_ctrl_seg wqe_ctrl = {0};
+	u32 no_use, used_rtc_0_id, used_rtc_1_id, ret;
+	struct mlx5hws_context_common_res *common_res;
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+	struct mlx5hws_send_engine *queue;
+	__be32 *wqe_data_arr;
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	/* Get the control queue */
+	queue = &ctx->send_queue[ctx->queues - 1];
+	if (unlikely(mlx5hws_send_engine_err(queue))) {
+		ret = -EIO;
+		goto error;
+	}
+
+	/* Init default send STE attributes */
+	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
+	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+	ste_attr.send_attr.user_data = &no_use;
+	ste_attr.send_attr.rule = NULL;
+	ste_attr.send_attr.fence = 1;
+	ste_attr.send_attr.notify_hw = true;
+	ste_attr.rtc_0 = table_ste->rtc_0_id;
+	ste_attr.rtc_1 = table_ste->rtc_1_id;
+	ste_attr.used_id_rtc_0 = &used_rtc_0_id;
+	ste_attr.used_id_rtc_1 = &used_rtc_1_id;
+
+	common_res = &ctx->common_res[MLX5HWS_TABLE_TYPE_FDB];
+
+	/* init an empty match STE which will always hit */
+	ste_attr.wqe_ctrl = &wqe_ctrl;
+	ste_attr.wqe_data = &match_wqe_data;
+	ste_attr.send_attr.match_definer_id = ctx->caps->trivial_match_definer;
+
+	/* Fill WQE control data */
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] =
+		htonl(common_res->default_stc->nop_ctr.offset);
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_DW5] =
+		htonl(common_res->default_stc->nop_dw5.offset);
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] =
+		htonl(common_res->default_stc->nop_dw6.offset);
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] =
+		htonl(common_res->default_stc->nop_dw7.offset);
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] |=
+		htonl(MLX5HWS_ACTION_STC_IDX_LAST_COMBO2 << 29);
+	wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] =
+		htonl(hit_ft_action->stc[MLX5HWS_TABLE_TYPE_FDB].offset);
+
+	wqe_data_arr = (__force __be32 *)&range_wqe_data;
+
+	ste_attr.range_wqe_data = &range_wqe_data;
+	ste_attr.send_attr.len += MLX5HWS_WQE_SZ_GTA_DATA;
+	ste_attr.send_attr.range_definer_id = mlx5hws_definer_get_id(range_definer);
+
+	/* Fill range matching fields,
+	 * min/max_value_2 corresponds to match_dw_0 in its definer,
+	 * min_value_2 sets in DW0 in the STE and max_value_2 sets in DW1 in the STE.
+	 */
+	wqe_data_arr[MLX5HWS_MATCHER_OFFSET_TAG_DW0] = htonl(min << 16);
+	wqe_data_arr[MLX5HWS_MATCHER_OFFSET_TAG_DW1] = htonl(max << 16);
+
+	/* Send WQEs to FW */
+	mlx5hws_send_stes_fw(ctx, queue, &ste_attr);
+
+	/* Poll for completion */
+	ret = mlx5hws_send_queue_action(ctx, ctx->queues - 1,
+					MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to drain control queue");
+		goto error;
+	}
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+
+error:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
+				       u32 field,
+				       struct mlx5_flow_table *hit_ft,
+				       struct mlx5_flow_table *miss_ft,
+				       u32 min, u32 max, u32 flags)
+{
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	struct mlx5hws_matcher_action_ste *table_ste;
+	struct mlx5hws_action *hit_ft_action;
+	struct mlx5hws_definer *definer;
+	struct mlx5hws_action *action;
+	u32 miss_ft_id = miss_ft->id;
+	u32 hit_ft_id = hit_ft->id;
+	int ret;
+
+	if (field != MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN ||
+	    min > 0xffff || max > 0xffff) {
+		mlx5hws_err(ctx, "Invalid match range parameters\n");
+		return NULL;
+	}
+
+	action = hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_RANGE);
+	if (!action)
+		return NULL;
+
+	definer = hws_action_create_dest_match_range_definer(ctx);
+	if (!definer)
+		goto free_action;
+
+	table_ste = hws_action_create_dest_match_range_table(ctx, definer, miss_ft_id);
+	if (!table_ste)
+		goto destroy_definer;
+
+	hit_ft_action = mlx5hws_action_create_dest_table_num(ctx, hit_ft_id, flags);
+	if (!hit_ft_action)
+		goto destroy_table_ste;
+
+	ret = hws_action_create_dest_match_range_fill_table(ctx, table_ste,
+							    hit_ft_action,
+							    definer, min, max);
+	if (ret)
+		goto destroy_hit_ft_action;
+
+	action->range.table_ste = table_ste;
+	action->range.definer = definer;
+	action->range.hit_ft_action = hit_ft_action;
+
+	/* Allocate STC for jumps to STE */
+	mutex_lock(&ctx->ctrl_lock);
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+	stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
+	stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+	stc_attr.ste_table.ste = table_ste->ste;
+	stc_attr.ste_table.ste_pool = table_ste->pool;
+	stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
+
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB,
+					      &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+	if (ret)
+		goto error_unlock;
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return action;
+
+error_unlock:
+	mutex_unlock(&ctx->ctrl_lock);
+destroy_hit_ft_action:
+	mlx5hws_action_destroy(hit_ft_action);
+destroy_table_ste:
+	hws_action_destroy_dest_match_range_table(ctx, table_ste);
+destroy_definer:
+	mlx5hws_definer_free(ctx, definer);
+free_action:
+	kfree(action);
+	mlx5hws_err(ctx, "Failed to create action dest match range");
+	return NULL;
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_last(struct mlx5hws_context *ctx, u32 flags)
+{
+	return hws_action_create_generic(ctx, flags, MLX5HWS_ACTION_TYP_LAST);
+}
+
+struct mlx5hws_action *
+mlx5hws_action_create_flow_sampler(struct mlx5hws_context *ctx,
+				   u32 sampler_id, u32 flags)
+{
+	mlx5hws_err(ctx, "Flow sampler action - unsupported\n");
+	return NULL;
+}
+
+static void hws_action_destroy_hws(struct mlx5hws_action *action)
+{
+	u32 ext_reformat_id;
+	bool shared_arg;
+	u32 obj_id;
+	u32 i;
+
+	switch (action->type) {
+	case MLX5HWS_ACTION_TYP_MISS:
+	case MLX5HWS_ACTION_TYP_TAG:
+	case MLX5HWS_ACTION_TYP_DROP:
+	case MLX5HWS_ACTION_TYP_CTR:
+	case MLX5HWS_ACTION_TYP_TBL:
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2:
+	case MLX5HWS_ACTION_TYP_ASO_METER:
+	case MLX5HWS_ACTION_TYP_PUSH_VLAN:
+	case MLX5HWS_ACTION_TYP_REMOVE_HEADER:
+	case MLX5HWS_ACTION_TYP_VPORT:
+		hws_action_destroy_stcs(action);
+		break;
+	case MLX5HWS_ACTION_TYP_POP_VLAN:
+		hws_action_destroy_stcs(action);
+		hws_action_put_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP);
+		break;
+	case MLX5HWS_ACTION_TYP_DEST_ARRAY:
+		hws_action_destroy_stcs(action);
+		mlx5hws_cmd_forward_tbl_destroy(action->ctx->mdev, action->dest_array.fw_island);
+		for (i = 0; i < action->dest_array.num_dest; i++) {
+			ext_reformat_id = action->dest_array.dest_list[i].ext_reformat_id;
+			if (ext_reformat_id)
+				mlx5hws_cmd_packet_reformat_destroy(action->ctx->mdev,
+								    ext_reformat_id);
+		}
+		kfree(action->dest_array.dest_list);
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
+	case MLX5HWS_ACTION_TYP_MODIFY_HDR:
+		shared_arg = false;
+		for (i = 0; i < action->modify_header.num_of_patterns; i++) {
+			hws_action_destroy_stcs(&action[i]);
+			if (action[i].modify_header.num_of_actions > 1) {
+				mlx5hws_pat_put_pattern(action[i].ctx,
+							action[i].modify_header.pat_id);
+				/* Save shared arg object to be freed after */
+				obj_id = action[i].modify_header.arg_id;
+				shared_arg = true;
+			}
+		}
+		if (shared_arg)
+			mlx5hws_arg_destroy(action->ctx, obj_id);
+		break;
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3:
+		hws_action_put_shared_stc(action, MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3);
+		for (i = 0; i < action->reformat.num_of_hdrs; i++)
+			hws_action_destroy_stcs(&action[i]);
+		mlx5hws_arg_destroy(action->ctx, action->reformat.arg_id);
+		break;
+	case MLX5HWS_ACTION_TYP_INSERT_HEADER:
+	case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2:
+		for (i = 0; i < action->reformat.num_of_hdrs; i++)
+			hws_action_destroy_stcs(&action[i]);
+		mlx5hws_arg_destroy(action->ctx, action->reformat.arg_id);
+		break;
+	case MLX5HWS_ACTION_TYP_RANGE:
+		hws_action_destroy_stcs(action);
+		hws_action_destroy_dest_match_range_table(action->ctx, action->range.table_ste);
+		mlx5hws_definer_free(action->ctx, action->range.definer);
+		mlx5hws_action_destroy(action->range.hit_ft_action);
+		break;
+	case MLX5HWS_ACTION_TYP_LAST:
+		break;
+	default:
+		pr_warn("HWS: Invalid action type: %d\n", action->type);
+	}
+}
+
+int mlx5hws_action_destroy(struct mlx5hws_action *action)
+{
+	hws_action_destroy_hws(action);
+
+	kfree(action);
+	return 0;
+}
+
+int mlx5hws_action_get_default_stc(struct mlx5hws_context *ctx, u8 tbl_type)
+__must_hold(&ctx->ctrl_lock)
+{
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	struct mlx5hws_action_default_stc *default_stc;
+	int ret;
+
+	if (ctx->common_res[tbl_type].default_stc) {
+		ctx->common_res[tbl_type].default_stc->refcount++;
+		return 0;
+	}
+
+	default_stc = kzalloc(sizeof(*default_stc), GFP_KERNEL);
+	if (!default_stc)
+		return -ENOMEM;
+
+	stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_NOP;
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW0;
+	stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &default_stc->nop_ctr);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate default counter STC\n");
+		goto free_default_stc;
+	}
+
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW5;
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &default_stc->nop_dw5);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate default NOP DW5 STC\n");
+		goto free_nop_ctr;
+	}
+
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW6;
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &default_stc->nop_dw6);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate default NOP DW6 STC\n");
+		goto free_nop_dw5;
+	}
+
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_DW7;
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &default_stc->nop_dw7);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate default NOP DW7 STC\n");
+		goto free_nop_dw6;
+	}
+
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+	stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_ALLOW;
+
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+					      &default_stc->default_hit);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate default allow STC\n");
+		goto free_nop_dw7;
+	}
+
+	ctx->common_res[tbl_type].default_stc = default_stc;
+	ctx->common_res[tbl_type].default_stc->refcount++;
+
+	return 0;
+
+free_nop_dw7:
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw7);
+free_nop_dw6:
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw6);
+free_nop_dw5:
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw5);
+free_nop_ctr:
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_ctr);
+free_default_stc:
+	kfree(default_stc);
+	return ret;
+}
+
+void mlx5hws_action_put_default_stc(struct mlx5hws_context *ctx, u8 tbl_type)
+__must_hold(&ctx->ctrl_lock)
+{
+	struct mlx5hws_action_default_stc *default_stc;
+
+	default_stc = ctx->common_res[tbl_type].default_stc;
+
+	default_stc = ctx->common_res[tbl_type].default_stc;
+	if (--default_stc->refcount)
+		return;
+
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->default_hit);
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw7);
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw6);
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw5);
+	mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_ctr);
+	kfree(default_stc);
+	ctx->common_res[tbl_type].default_stc = NULL;
+}
+
+static void hws_action_modify_write(struct mlx5hws_send_engine *queue,
+				    u32 arg_idx,
+				    u8 *arg_data,
+				    u16 num_of_actions,
+				    u32 nope_locations)
+{
+	u8 *new_arg_data = NULL;
+	int i, j;
+
+	if (unlikely(nope_locations)) {
+		new_arg_data = kcalloc(num_of_actions,
+				       MLX5HWS_MODIFY_ACTION_SIZE, GFP_KERNEL);
+		if (unlikely(!new_arg_data))
+			return;
+
+		for (i = 0, j = 0; i < num_of_actions; i++, j++) {
+			memcpy(&new_arg_data[j], arg_data, MLX5HWS_MODIFY_ACTION_SIZE);
+			if (BIT(i) & nope_locations)
+				j++;
+		}
+	}
+
+	mlx5hws_arg_write(queue, NULL, arg_idx,
+			  new_arg_data ? new_arg_data : arg_data,
+			  num_of_actions * MLX5HWS_MODIFY_ACTION_SIZE);
+
+	kfree(new_arg_data);
+}
+
+void mlx5hws_action_prepare_decap_l3_data(u8 *src, u8 *dst, u16 num_of_actions)
+{
+	u8 *e_src;
+	int i;
+
+	/* num_of_actions = remove l3l2 + 4/5 inserts + remove extra 2 bytes
+	 * copy from end of src to the start of dst.
+	 * move to the end, 2 is the leftover from 14B or 18B
+	 */
+	if (num_of_actions == DECAP_L3_NUM_ACTIONS_W_NO_VLAN)
+		e_src = src + MLX5HWS_ACTION_HDR_LEN_L2;
+	else
+		e_src = src + MLX5HWS_ACTION_HDR_LEN_L2_W_VLAN;
+
+	/* Move dst over the first remove action + zero data */
+	dst += MLX5HWS_ACTION_DOUBLE_SIZE;
+	/* Move dst over the first insert ctrl action */
+	dst += MLX5HWS_ACTION_DOUBLE_SIZE / 2;
+	/* Actions:
+	 * no vlan: r_h-insert_4b-insert_4b-insert_4b-insert_4b-remove_2b.
+	 * with vlan: r_h-insert_4b-insert_4b-insert_4b-insert_4b-insert_4b-remove_2b.
+	 * the loop is without the last insertion.
+	 */
+	for (i = 0; i < num_of_actions - 3; i++) {
+		e_src -= MLX5HWS_ACTION_INLINE_DATA_SIZE;
+		memcpy(dst, e_src, MLX5HWS_ACTION_INLINE_DATA_SIZE); /* data */
+		dst += MLX5HWS_ACTION_DOUBLE_SIZE;
+	}
+	/* Copy the last 2 bytes after a gap of 2 bytes which will be removed */
+	e_src -= MLX5HWS_ACTION_INLINE_DATA_SIZE / 2;
+	dst += MLX5HWS_ACTION_INLINE_DATA_SIZE / 2;
+	memcpy(dst, e_src, 2);
+}
+
+static int
+hws_action_get_shared_stc_offset(struct mlx5hws_context_common_res *common_res,
+				 enum mlx5hws_context_shared_stc_type stc_type)
+{
+	return common_res->shared_stc[stc_type]->stc_chunk.offset;
+}
+
+static struct mlx5hws_actions_wqe_setter *
+hws_action_setter_find_first(struct mlx5hws_actions_wqe_setter *setter,
+			     u8 req_flags)
+{
+	/* Use a new setter if requested flags are taken */
+	while (setter->flags & req_flags)
+		setter++;
+
+	/* Use current setter in required flags are not used */
+	return setter;
+}
+
+static void
+hws_action_apply_stc(struct mlx5hws_actions_apply_data *apply,
+		     enum mlx5hws_action_stc_idx stc_idx,
+		     u8 action_idx)
+{
+	struct mlx5hws_action *action = apply->rule_action[action_idx].action;
+
+	apply->wqe_ctrl->stc_ix[stc_idx] =
+		htonl(action->stc[apply->tbl_type].offset);
+}
+
+static void
+hws_action_setter_push_vlan(struct mlx5hws_actions_apply_data *apply,
+			    struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+
+	rule_action = &apply->rule_action[setter->idx_double];
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = rule_action->push_vlan.vlan_hdr;
+
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_DW6, setter->idx_double);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+}
+
+static void
+hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply,
+				struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+	struct mlx5hws_action *action;
+	u32 arg_sz, arg_idx;
+	u8 *single_action;
+	__be32 stc_idx;
+
+	rule_action = &apply->rule_action[setter->idx_double];
+	action = rule_action->action;
+
+	stc_idx = htonl(action->stc[apply->tbl_type].offset);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+
+	if (action->modify_header.num_of_actions == 1) {
+		if (action->modify_header.single_action_type ==
+		    MLX5_MODIFICATION_TYPE_COPY ||
+		    action->modify_header.single_action_type ==
+		    MLX5_MODIFICATION_TYPE_ADD_FIELD) {
+			apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = 0;
+			return;
+		}
+
+		if (action->flags & MLX5HWS_ACTION_FLAG_SHARED)
+			single_action = (u8 *)&action->modify_header.single_action;
+		else
+			single_action = rule_action->modify_header.data;
+
+		apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] =
+			*(__be32 *)MLX5_ADDR_OF(set_action_in, single_action, data);
+	} else {
+		/* Argument offset multiple with number of args per these actions */
+		arg_sz = mlx5hws_arg_get_arg_size(action->modify_header.max_num_of_actions);
+		arg_idx = rule_action->modify_header.offset * arg_sz;
+
+		apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
+
+		if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) {
+			apply->require_dep = 1;
+			hws_action_modify_write(apply->queue,
+						action->modify_header.arg_id + arg_idx,
+						rule_action->modify_header.data,
+						action->modify_header.num_of_actions,
+						action->modify_header.nope_locations);
+		}
+	}
+}
+
+static void
+hws_action_setter_insert_ptr(struct mlx5hws_actions_apply_data *apply,
+			     struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+	struct mlx5hws_action *action;
+	u32 arg_idx, arg_sz;
+	__be32 stc_idx;
+
+	rule_action = &apply->rule_action[setter->idx_double];
+	action = rule_action->action + rule_action->reformat.hdr_idx;
+
+	/* Argument offset multiple on args required for header size */
+	arg_sz = mlx5hws_arg_data_size_to_arg_size(action->reformat.max_hdr_sz);
+	arg_idx = rule_action->reformat.offset * arg_sz;
+
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
+
+	stc_idx = htonl(action->stc[apply->tbl_type].offset);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+
+	if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) {
+		apply->require_dep = 1;
+		mlx5hws_arg_write(apply->queue, NULL,
+				  action->reformat.arg_id + arg_idx,
+				  rule_action->reformat.data,
+				  action->reformat.header_size);
+	}
+}
+
+static void
+hws_action_setter_tnl_l3_to_l2(struct mlx5hws_actions_apply_data *apply,
+			       struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+	struct mlx5hws_action *action;
+	u32 arg_sz, arg_idx;
+	__be32 stc_idx;
+
+	rule_action = &apply->rule_action[setter->idx_double];
+	action = rule_action->action + rule_action->reformat.hdr_idx;
+
+	/* Argument offset multiple on args required for num of actions */
+	arg_sz = mlx5hws_arg_get_arg_size(action->modify_header.max_num_of_actions);
+	arg_idx = rule_action->reformat.offset * arg_sz;
+
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
+
+	stc_idx = htonl(action->stc[apply->tbl_type].offset);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+
+	if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) {
+		apply->require_dep = 1;
+		mlx5hws_arg_decapl3_write(apply->queue,
+					  action->modify_header.arg_id + arg_idx,
+					  rule_action->reformat.data,
+					  action->modify_header.num_of_actions);
+	}
+}
+
+static void
+hws_action_setter_aso(struct mlx5hws_actions_apply_data *apply,
+		      struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+	u32 exe_aso_ctrl;
+	u32 offset;
+
+	rule_action = &apply->rule_action[setter->idx_double];
+
+	switch (rule_action->action->type) {
+	case MLX5HWS_ACTION_TYP_ASO_METER:
+		/* exe_aso_ctrl format:
+		 * [STC only and reserved bits 29b][init_color 2b][meter_id 1b]
+		 */
+		offset = rule_action->aso_meter.offset / MLX5_ASO_METER_NUM_PER_OBJ;
+		exe_aso_ctrl = rule_action->aso_meter.offset % MLX5_ASO_METER_NUM_PER_OBJ;
+		exe_aso_ctrl |= rule_action->aso_meter.init_color <<
+				MLX5HWS_ACTION_METER_INIT_COLOR_OFFSET;
+		break;
+	default:
+		mlx5hws_err(rule_action->action->ctx,
+			    "Unsupported ASO action type: %d\n", rule_action->action->type);
+		return;
+	}
+
+	/* aso_object_offset format: [24B] */
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = htonl(offset);
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(exe_aso_ctrl);
+
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_DW6, setter->idx_double);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+}
+
+static void
+hws_action_setter_tag(struct mlx5hws_actions_apply_data *apply,
+		      struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+
+	rule_action = &apply->rule_action[setter->idx_single];
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = htonl(rule_action->tag.value);
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_DW5, setter->idx_single);
+}
+
+static void
+hws_action_setter_ctrl_ctr(struct mlx5hws_actions_apply_data *apply,
+			   struct mlx5hws_actions_wqe_setter *setter)
+{
+	struct mlx5hws_rule_action *rule_action;
+
+	rule_action = &apply->rule_action[setter->idx_ctr];
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW0] = htonl(rule_action->counter.offset);
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_CTRL, setter->idx_ctr);
+}
+
+static void
+hws_action_setter_single(struct mlx5hws_actions_apply_data *apply,
+			 struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = 0;
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_DW5, setter->idx_single);
+}
+
+static void
+hws_action_setter_single_double_pop(struct mlx5hws_actions_apply_data *apply,
+				    struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW5] =
+		htonl(hws_action_get_shared_stc_offset(apply->common_res,
+						       MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP));
+}
+
+static void
+hws_action_setter_hit(struct mlx5hws_actions_apply_data *apply,
+		      struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_HIT_LSB] = 0;
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_HIT, setter->idx_hit);
+}
+
+static void
+hws_action_setter_default_hit(struct mlx5hws_actions_apply_data *apply,
+			      struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_HIT_LSB] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] =
+		htonl(apply->common_res->default_stc->default_hit.offset);
+}
+
+static void
+hws_action_setter_hit_next_action(struct mlx5hws_actions_apply_data *apply,
+				  struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_HIT_LSB] = htonl(apply->next_direct_idx << 6);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] = htonl(apply->jump_to_action_stc);
+}
+
+static void
+hws_action_setter_common_decap(struct mlx5hws_actions_apply_data *apply,
+			       struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW5] =
+		htonl(hws_action_get_shared_stc_offset(apply->common_res,
+						       MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3));
+}
+
+static void
+hws_action_setter_range(struct mlx5hws_actions_apply_data *apply,
+			struct mlx5hws_actions_wqe_setter *setter)
+{
+	/* Always jump to index zero */
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_HIT_LSB] = 0;
+	hws_action_apply_stc(apply, MLX5HWS_ACTION_STC_IDX_HIT, setter->idx_hit);
+}
+
+int mlx5hws_action_template_process(struct mlx5hws_action_template *at)
+{
+	struct mlx5hws_actions_wqe_setter *start_setter = at->setters + 1;
+	enum mlx5hws_action_type *action_type = at->action_type_arr;
+	struct mlx5hws_actions_wqe_setter *setter = at->setters;
+	struct mlx5hws_actions_wqe_setter *pop_setter = NULL;
+	struct mlx5hws_actions_wqe_setter *last_setter;
+	int i;
+
+	/* Note: Given action combination must be valid */
+
+	/* Check if action were already processed */
+	if (at->num_of_action_stes)
+		return 0;
+
+	for (i = 0; i < MLX5HWS_ACTION_MAX_STE; i++)
+		setter[i].set_hit = &hws_action_setter_hit_next_action;
+
+	/* The same action template setters can be used with jumbo or match
+	 * STE, to support both cases we reserve the first setter for cases
+	 * with jumbo STE to allow jump to the first action STE.
+	 * This extra setter can be reduced in some cases on rule creation.
+	 */
+	setter = start_setter;
+	last_setter = start_setter;
+
+	for (i = 0; i < at->num_actions; i++) {
+		switch (action_type[i]) {
+		case MLX5HWS_ACTION_TYP_DROP:
+		case MLX5HWS_ACTION_TYP_TBL:
+		case MLX5HWS_ACTION_TYP_DEST_ARRAY:
+		case MLX5HWS_ACTION_TYP_VPORT:
+		case MLX5HWS_ACTION_TYP_MISS:
+			/* Hit action */
+			last_setter->flags |= ASF_HIT;
+			last_setter->set_hit = &hws_action_setter_hit;
+			last_setter->idx_hit = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_RANGE:
+			last_setter->flags |= ASF_HIT;
+			last_setter->set_hit = &hws_action_setter_range;
+			last_setter->idx_hit = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_POP_VLAN:
+			/* Single remove header to header */
+			if (pop_setter) {
+				/* We have 2 pops, use the shared */
+				pop_setter->set_single = &hws_action_setter_single_double_pop;
+				break;
+			}
+			setter = hws_action_setter_find_first(last_setter,
+							      ASF_SINGLE1 | ASF_MODIFY |
+							      ASF_INSERT);
+			setter->flags |= ASF_SINGLE1 | ASF_REMOVE;
+			setter->set_single = &hws_action_setter_single;
+			setter->idx_single = i;
+			pop_setter = setter;
+			break;
+
+		case MLX5HWS_ACTION_TYP_PUSH_VLAN:
+			/* Double insert inline */
+			setter = hws_action_setter_find_first(last_setter, ASF_DOUBLE | ASF_REMOVE);
+			setter->flags |= ASF_DOUBLE | ASF_INSERT;
+			setter->set_double = &hws_action_setter_push_vlan;
+			setter->idx_double = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_MODIFY_HDR:
+			/* Double modify header list */
+			setter = hws_action_setter_find_first(last_setter, ASF_DOUBLE | ASF_REMOVE);
+			setter->flags |= ASF_DOUBLE | ASF_MODIFY;
+			setter->set_double = &hws_action_setter_modify_header;
+			setter->idx_double = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_ASO_METER:
+			/* Double ASO action */
+			setter = hws_action_setter_find_first(last_setter, ASF_DOUBLE);
+			setter->flags |= ASF_DOUBLE;
+			setter->set_double = &hws_action_setter_aso;
+			setter->idx_double = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_REMOVE_HEADER:
+		case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2:
+			/* Single remove header to header */
+			setter = hws_action_setter_find_first(last_setter,
+							      ASF_SINGLE1 | ASF_MODIFY);
+			setter->flags |= ASF_SINGLE1 | ASF_REMOVE;
+			setter->set_single = &hws_action_setter_single;
+			setter->idx_single = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_INSERT_HEADER:
+		case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2:
+			/* Double insert header with pointer */
+			setter = hws_action_setter_find_first(last_setter, ASF_DOUBLE | ASF_REMOVE);
+			setter->flags |= ASF_DOUBLE | ASF_INSERT;
+			setter->set_double = &hws_action_setter_insert_ptr;
+			setter->idx_double = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3:
+			/* Single remove + Double insert header with pointer */
+			setter = hws_action_setter_find_first(last_setter,
+							      ASF_SINGLE1 | ASF_DOUBLE);
+			setter->flags |= ASF_SINGLE1 | ASF_DOUBLE;
+			setter->set_double = &hws_action_setter_insert_ptr;
+			setter->idx_double = i;
+			setter->set_single = &hws_action_setter_common_decap;
+			setter->idx_single = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
+			/* Double modify header list with remove and push inline */
+			setter = hws_action_setter_find_first(last_setter, ASF_DOUBLE | ASF_REMOVE);
+			setter->flags |= ASF_DOUBLE | ASF_MODIFY | ASF_INSERT;
+			setter->set_double = &hws_action_setter_tnl_l3_to_l2;
+			setter->idx_double = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_TAG:
+			/* Single TAG action, search for any room from the start */
+			setter = hws_action_setter_find_first(start_setter, ASF_SINGLE1);
+			setter->flags |= ASF_SINGLE1;
+			setter->set_single = &hws_action_setter_tag;
+			setter->idx_single = i;
+			break;
+
+		case MLX5HWS_ACTION_TYP_CTR:
+			/* Control counter action
+			 * TODO: Current counter executed first. Support is needed
+			 *	 for single ation counter action which is done last.
+			 *	 Example: Decap + CTR
+			 */
+			setter = hws_action_setter_find_first(start_setter, ASF_CTR);
+			setter->flags |= ASF_CTR;
+			setter->set_ctr = &hws_action_setter_ctrl_ctr;
+			setter->idx_ctr = i;
+			break;
+		default:
+			pr_warn("HWS: Invalid action type in processingaction template: action_type[%d]=%d\n",
+				i, action_type[i]);
+			return -EOPNOTSUPP;
+		}
+
+		last_setter = max(setter, last_setter);
+	}
+
+	/* Set default hit on the last STE if no hit action provided */
+	if (!(last_setter->flags & ASF_HIT))
+		last_setter->set_hit = &hws_action_setter_default_hit;
+
+	at->num_of_action_stes = last_setter - start_setter + 1;
+
+	/* Check if action template doesn't require any action DWs */
+	at->only_term = (at->num_of_action_stes == 1) &&
+		!(last_setter->flags & ~(ASF_CTR | ASF_HIT));
+
+	return 0;
+}
+
+struct mlx5hws_action_template *
+mlx5hws_action_template_create(enum mlx5hws_action_type action_type[])
+{
+	struct mlx5hws_action_template *at;
+	u8 num_actions = 0;
+	int i;
+
+	at = kzalloc(sizeof(*at), GFP_KERNEL);
+	if (!at)
+		return NULL;
+
+	while (action_type[num_actions++] != MLX5HWS_ACTION_TYP_LAST)
+		;
+
+	at->num_actions = num_actions - 1;
+	at->action_type_arr = kcalloc(num_actions, sizeof(*action_type), GFP_KERNEL);
+	if (!at->action_type_arr)
+		goto free_at;
+
+	for (i = 0; i < num_actions; i++)
+		at->action_type_arr[i] = action_type[i];
+
+	return at;
+
+free_at:
+	kfree(at);
+	return NULL;
+}
+
+int mlx5hws_action_template_destroy(struct mlx5hws_action_template *at)
+{
+	kfree(at->action_type_arr);
+	kfree(at);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.h
new file mode 100644
index 000000000000..bf5c1b241006
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_action.h
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_ACTION_H_
+#define MLX5HWS_ACTION_H_
+
+/* Max number of STEs needed for a rule (including match) */
+#define MLX5HWS_ACTION_MAX_STE 20
+
+/* Max number of internal subactions of ipv6_ext */
+#define MLX5HWS_ACTION_IPV6_EXT_MAX_SA 4
+
+enum mlx5hws_action_stc_idx {
+	MLX5HWS_ACTION_STC_IDX_CTRL = 0,
+	MLX5HWS_ACTION_STC_IDX_HIT = 1,
+	MLX5HWS_ACTION_STC_IDX_DW5 = 2,
+	MLX5HWS_ACTION_STC_IDX_DW6 = 3,
+	MLX5HWS_ACTION_STC_IDX_DW7 = 4,
+	MLX5HWS_ACTION_STC_IDX_MAX = 5,
+	/* STC Jumvo STE combo: CTR, Hit */
+	MLX5HWS_ACTION_STC_IDX_LAST_JUMBO_STE = 1,
+	/* STC combo1: CTR, SINGLE, DOUBLE, Hit */
+	MLX5HWS_ACTION_STC_IDX_LAST_COMBO1 = 3,
+	/* STC combo2: CTR, 3 x SINGLE, Hit */
+	MLX5HWS_ACTION_STC_IDX_LAST_COMBO2 = 4,
+	/* STC combo2: CTR, TRIPLE, Hit */
+	MLX5HWS_ACTION_STC_IDX_LAST_COMBO3 = 2,
+};
+
+enum mlx5hws_action_offset {
+	MLX5HWS_ACTION_OFFSET_DW0 = 0,
+	MLX5HWS_ACTION_OFFSET_DW5 = 5,
+	MLX5HWS_ACTION_OFFSET_DW6 = 6,
+	MLX5HWS_ACTION_OFFSET_DW7 = 7,
+	MLX5HWS_ACTION_OFFSET_HIT = 3,
+	MLX5HWS_ACTION_OFFSET_HIT_LSB = 4,
+};
+
+enum {
+	MLX5HWS_ACTION_DOUBLE_SIZE = 8,
+	MLX5HWS_ACTION_INLINE_DATA_SIZE = 4,
+	MLX5HWS_ACTION_HDR_LEN_L2_MACS = 12,
+	MLX5HWS_ACTION_HDR_LEN_L2_VLAN = 4,
+	MLX5HWS_ACTION_HDR_LEN_L2_ETHER = 2,
+	MLX5HWS_ACTION_HDR_LEN_L2 = (MLX5HWS_ACTION_HDR_LEN_L2_MACS +
+				     MLX5HWS_ACTION_HDR_LEN_L2_ETHER),
+	MLX5HWS_ACTION_HDR_LEN_L2_W_VLAN = (MLX5HWS_ACTION_HDR_LEN_L2 +
+					    MLX5HWS_ACTION_HDR_LEN_L2_VLAN),
+	MLX5HWS_ACTION_REFORMAT_DATA_SIZE = 64,
+	DECAP_L3_NUM_ACTIONS_W_NO_VLAN = 6,
+	DECAP_L3_NUM_ACTIONS_W_VLAN = 7,
+};
+
+enum mlx5hws_action_setter_flag {
+	ASF_SINGLE1 = 1 << 0,
+	ASF_SINGLE2 = 1 << 1,
+	ASF_SINGLE3 = 1 << 2,
+	ASF_DOUBLE = ASF_SINGLE2 | ASF_SINGLE3,
+	ASF_TRIPLE = ASF_SINGLE1 | ASF_DOUBLE,
+	ASF_INSERT = 1 << 3,
+	ASF_REMOVE = 1 << 4,
+	ASF_MODIFY = 1 << 5,
+	ASF_CTR = 1 << 6,
+	ASF_HIT = 1 << 7,
+};
+
+struct mlx5hws_action_default_stc {
+	struct mlx5hws_pool_chunk nop_ctr;
+	struct mlx5hws_pool_chunk nop_dw5;
+	struct mlx5hws_pool_chunk nop_dw6;
+	struct mlx5hws_pool_chunk nop_dw7;
+	struct mlx5hws_pool_chunk default_hit;
+	u32 refcount;
+};
+
+struct mlx5hws_action_shared_stc {
+	struct mlx5hws_pool_chunk stc_chunk;
+	u32 refcount;
+};
+
+struct mlx5hws_actions_apply_data {
+	struct mlx5hws_send_engine *queue;
+	struct mlx5hws_rule_action *rule_action;
+	__be32 *wqe_data;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
+	u32 jump_to_action_stc;
+	struct mlx5hws_context_common_res *common_res;
+	enum mlx5hws_table_type tbl_type;
+	u32 next_direct_idx;
+	u8 require_dep;
+};
+
+struct mlx5hws_actions_wqe_setter;
+
+typedef void (*mlx5hws_action_setter_fp)(struct mlx5hws_actions_apply_data *apply,
+					 struct mlx5hws_actions_wqe_setter *setter);
+
+struct mlx5hws_actions_wqe_setter {
+	mlx5hws_action_setter_fp set_single;
+	mlx5hws_action_setter_fp set_double;
+	mlx5hws_action_setter_fp set_triple;
+	mlx5hws_action_setter_fp set_hit;
+	mlx5hws_action_setter_fp set_ctr;
+	u8 idx_single;
+	u8 idx_double;
+	u8 idx_triple;
+	u8 idx_ctr;
+	u8 idx_hit;
+	u8 stage_idx;
+	u8 flags;
+};
+
+struct mlx5hws_action_template {
+	struct mlx5hws_actions_wqe_setter setters[MLX5HWS_ACTION_MAX_STE];
+	enum mlx5hws_action_type *action_type_arr;
+	u8 num_of_action_stes;
+	u8 num_actions;
+	u8 only_term;
+};
+
+struct mlx5hws_action {
+	u8 type;
+	u8 flags;
+	struct mlx5hws_context *ctx;
+	union {
+		struct {
+			struct mlx5hws_pool_chunk stc[MLX5HWS_TABLE_TYPE_MAX];
+			union {
+				struct {
+					u32 pat_id;
+					u32 arg_id;
+					__be64 single_action;
+					u32 nope_locations;
+					u8 num_of_patterns;
+					u8 single_action_type;
+					u8 num_of_actions;
+					u8 max_num_of_actions;
+					u8 require_reparse;
+				} modify_header;
+				struct {
+					u32 arg_id;
+					u32 header_size;
+					u16 max_hdr_sz;
+					u8 num_of_hdrs;
+					u8 anchor;
+					u8 e_anchor;
+					u8 offset;
+					bool encap;
+					u8 require_reparse;
+				} reformat;
+				struct {
+					u32 obj_id;
+					u8 return_reg_id;
+				} aso;
+				struct {
+					u16 vport_num;
+					u16 esw_owner_vhca_id;
+					bool esw_owner_vhca_id_valid;
+				} vport;
+				struct {
+					u32 obj_id;
+				} dest_obj;
+				struct {
+					struct mlx5hws_cmd_forward_tbl *fw_island;
+					size_t num_dest;
+					struct mlx5hws_cmd_set_fte_dest *dest_list;
+				} dest_array;
+				struct {
+					u8 type;
+					u8 start_anchor;
+					u8 end_anchor;
+					u8 num_of_words;
+					bool decap;
+				} insert_hdr;
+				struct {
+					/* PRM start anchor from which header will be removed */
+					u8 anchor;
+					/* Header remove offset in bytes, from the start
+					 * anchor to the location where remove header starts.
+					 */
+					u8 offset;
+					/* Indicates the removed header size in bytes */
+					size_t size;
+				} remove_header;
+				struct {
+					struct mlx5hws_matcher_action_ste *table_ste;
+					struct mlx5hws_action *hit_ft_action;
+					struct mlx5hws_definer *definer;
+				} range;
+			};
+		};
+
+		struct ibv_flow_action *flow_action;
+		u32 obj_id;
+		struct ibv_qp *qp;
+	};
+};
+
+const char *mlx5hws_action_type_to_str(enum mlx5hws_action_type action_type);
+
+int mlx5hws_action_get_default_stc(struct mlx5hws_context *ctx,
+				   u8 tbl_type);
+
+void mlx5hws_action_put_default_stc(struct mlx5hws_context *ctx,
+				    u8 tbl_type);
+
+void mlx5hws_action_prepare_decap_l3_data(u8 *src, u8 *dst,
+					  u16 num_of_actions);
+
+int mlx5hws_action_template_process(struct mlx5hws_action_template *at);
+
+bool mlx5hws_action_check_combo(struct mlx5hws_context *ctx,
+				enum mlx5hws_action_type *user_actions,
+				enum mlx5hws_table_type table_type);
+
+int mlx5hws_action_alloc_single_stc(struct mlx5hws_context *ctx,
+				    struct mlx5hws_cmd_stc_modify_attr *stc_attr,
+				    u32 table_type,
+				    struct mlx5hws_pool_chunk *stc);
+
+void mlx5hws_action_free_single_stc(struct mlx5hws_context *ctx,
+				    u32 table_type,
+				    struct mlx5hws_pool_chunk *stc);
+
+static inline void
+mlx5hws_action_setter_default_single(struct mlx5hws_actions_apply_data *apply,
+				     struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW5] =
+		htonl(apply->common_res->default_stc->nop_dw5.offset);
+}
+
+static inline void
+mlx5hws_action_setter_default_double(struct mlx5hws_actions_apply_data *apply,
+				     struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] =
+		htonl(apply->common_res->default_stc->nop_dw6.offset);
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] =
+		htonl(apply->common_res->default_stc->nop_dw7.offset);
+}
+
+static inline void
+mlx5hws_action_setter_default_ctr(struct mlx5hws_actions_apply_data *apply,
+				  struct mlx5hws_actions_wqe_setter *setter)
+{
+	apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW0] = 0;
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] =
+		htonl(apply->common_res->default_stc->nop_ctr.offset);
+}
+
+static inline void
+mlx5hws_action_apply_setter(struct mlx5hws_actions_apply_data *apply,
+			    struct mlx5hws_actions_wqe_setter *setter,
+			    bool is_jumbo)
+{
+	u8 num_of_actions;
+
+	/* Set control counter */
+	if (setter->set_ctr)
+		setter->set_ctr(apply, setter);
+	else
+		mlx5hws_action_setter_default_ctr(apply, setter);
+
+	if (!is_jumbo) {
+		if (unlikely(setter->set_triple)) {
+			/* Set triple on match */
+			setter->set_triple(apply, setter);
+			num_of_actions = MLX5HWS_ACTION_STC_IDX_LAST_COMBO3;
+		} else {
+			/* Set single and double on match */
+			if (setter->set_single)
+				setter->set_single(apply, setter);
+			else
+				mlx5hws_action_setter_default_single(apply, setter);
+
+			if (setter->set_double)
+				setter->set_double(apply, setter);
+			else
+				mlx5hws_action_setter_default_double(apply, setter);
+
+			num_of_actions = setter->set_double ?
+				MLX5HWS_ACTION_STC_IDX_LAST_COMBO1 :
+				MLX5HWS_ACTION_STC_IDX_LAST_COMBO2;
+		}
+	} else {
+		apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW5] = 0;
+		apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
+		apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = 0;
+		apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW5] = 0;
+		apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = 0;
+		apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
+		num_of_actions = MLX5HWS_ACTION_STC_IDX_LAST_JUMBO_STE;
+	}
+
+	/* Set next/final hit action */
+	setter->set_hit(apply, setter);
+
+	/* Set number of actions */
+	apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] |=
+		htonl(num_of_actions << 29);
+}
+
+#endif /* MLX5HWS_ACTION_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.c
new file mode 100644
index 000000000000..e6ed66202a40
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+#include "mlx5hws_buddy.h"
+
+static int hws_buddy_init(struct mlx5hws_buddy_mem *buddy, u32 max_order)
+{
+	int i, s, ret = 0;
+
+	buddy->max_order = max_order;
+
+	buddy->bitmap = kcalloc(buddy->max_order + 1,
+				sizeof(*buddy->bitmap),
+				GFP_KERNEL);
+	if (!buddy->bitmap)
+		return -ENOMEM;
+
+	buddy->num_free = kcalloc(buddy->max_order + 1,
+				  sizeof(*buddy->num_free),
+				  GFP_KERNEL);
+	if (!buddy->num_free) {
+		ret = -ENOMEM;
+		goto err_out_free_bits;
+	}
+
+	for (i = 0; i <= (int)buddy->max_order; ++i) {
+		s = 1 << (buddy->max_order - i);
+
+		buddy->bitmap[i] = bitmap_zalloc(s, GFP_KERNEL);
+		if (!buddy->bitmap[i]) {
+			ret = -ENOMEM;
+			goto err_out_free_num_free;
+		}
+	}
+
+	bitmap_set(buddy->bitmap[buddy->max_order], 0, 1);
+	buddy->num_free[buddy->max_order] = 1;
+
+	return 0;
+
+err_out_free_num_free:
+	for (i = 0; i <= (int)buddy->max_order; ++i)
+		bitmap_free(buddy->bitmap[i]);
+
+	kfree(buddy->num_free);
+
+err_out_free_bits:
+	kfree(buddy->bitmap);
+	return ret;
+}
+
+struct mlx5hws_buddy_mem *mlx5hws_buddy_create(u32 max_order)
+{
+	struct mlx5hws_buddy_mem *buddy;
+
+	buddy = kzalloc(sizeof(*buddy), GFP_KERNEL);
+	if (!buddy)
+		return NULL;
+
+	if (hws_buddy_init(buddy, max_order))
+		goto free_buddy;
+
+	return buddy;
+
+free_buddy:
+	kfree(buddy);
+	return NULL;
+}
+
+void mlx5hws_buddy_cleanup(struct mlx5hws_buddy_mem *buddy)
+{
+	int i;
+
+	for (i = 0; i <= (int)buddy->max_order; ++i)
+		bitmap_free(buddy->bitmap[i]);
+
+	kfree(buddy->num_free);
+	kfree(buddy->bitmap);
+}
+
+static int hws_buddy_find_free_seg(struct mlx5hws_buddy_mem *buddy,
+				   u32 start_order,
+				   u32 *segment,
+				   u32 *order)
+{
+	unsigned int seg, order_iter, m;
+
+	for (order_iter = start_order;
+	     order_iter <= buddy->max_order; ++order_iter) {
+		if (!buddy->num_free[order_iter])
+			continue;
+
+		m = 1 << (buddy->max_order - order_iter);
+		seg = find_first_bit(buddy->bitmap[order_iter], m);
+
+		if (WARN(seg >= m,
+			 "ICM Buddy: failed finding free mem for order %d\n",
+			 order_iter))
+			return -ENOMEM;
+
+		break;
+	}
+
+	if (order_iter > buddy->max_order)
+		return -ENOMEM;
+
+	*segment = seg;
+	*order = order_iter;
+	return 0;
+}
+
+int mlx5hws_buddy_alloc_mem(struct mlx5hws_buddy_mem *buddy, u32 order)
+{
+	u32 seg, order_iter, err;
+
+	err = hws_buddy_find_free_seg(buddy, order, &seg, &order_iter);
+	if (err)
+		return err;
+
+	bitmap_clear(buddy->bitmap[order_iter], seg, 1);
+	--buddy->num_free[order_iter];
+
+	while (order_iter > order) {
+		--order_iter;
+		seg <<= 1;
+		bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1);
+		++buddy->num_free[order_iter];
+	}
+
+	seg <<= order;
+
+	return seg;
+}
+
+void mlx5hws_buddy_free_mem(struct mlx5hws_buddy_mem *buddy, u32 seg, u32 order)
+{
+	seg >>= order;
+
+	while (test_bit(seg ^ 1, buddy->bitmap[order])) {
+		bitmap_clear(buddy->bitmap[order], seg ^ 1, 1);
+		--buddy->num_free[order];
+		seg >>= 1;
+		++order;
+	}
+
+	bitmap_set(buddy->bitmap[order], seg, 1);
+	++buddy->num_free[order];
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.h
new file mode 100644
index 000000000000..338c44bbedaf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_buddy.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_BUDDY_H_
+#define MLX5HWS_BUDDY_H_
+
+struct mlx5hws_buddy_mem {
+	unsigned long **bitmap;
+	unsigned int *num_free;
+	u32 max_order;
+};
+
+struct mlx5hws_buddy_mem *mlx5hws_buddy_create(u32 max_order);
+
+void mlx5hws_buddy_cleanup(struct mlx5hws_buddy_mem *buddy);
+
+int mlx5hws_buddy_alloc_mem(struct mlx5hws_buddy_mem *buddy, u32 order);
+
+void mlx5hws_buddy_free_mem(struct mlx5hws_buddy_mem *buddy, u32 seg, u32 order);
+
+#endif /* MLX5HWS_BUDDY_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.c
new file mode 100644
index 000000000000..bd52b05db367
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.c
@@ -0,0 +1,997 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+static u16 hws_bwc_gen_queue_idx(struct mlx5hws_context *ctx)
+{
+	/* assign random queue */
+	return get_random_u8() % mlx5hws_bwc_queues(ctx);
+}
+
+static u16
+hws_bwc_get_burst_th(struct mlx5hws_context *ctx, u16 queue_id)
+{
+	return min(ctx->send_queue[queue_id].num_entries / 2,
+		   MLX5HWS_BWC_MATCHER_REHASH_BURST_TH);
+}
+
+static struct mutex *
+hws_bwc_get_queue_lock(struct mlx5hws_context *ctx, u16 idx)
+{
+	return &ctx->bwc_send_queue_locks[idx];
+}
+
+static void hws_bwc_lock_all_queues(struct mlx5hws_context *ctx)
+{
+	u16 bwc_queues = mlx5hws_bwc_queues(ctx);
+	struct mutex *queue_lock; /* Protect the queue */
+	int i;
+
+	for (i = 0; i < bwc_queues; i++) {
+		queue_lock = hws_bwc_get_queue_lock(ctx, i);
+		mutex_lock(queue_lock);
+	}
+}
+
+static void hws_bwc_unlock_all_queues(struct mlx5hws_context *ctx)
+{
+	u16 bwc_queues = mlx5hws_bwc_queues(ctx);
+	struct mutex *queue_lock; /* Protect the queue */
+	int i = bwc_queues;
+
+	while (i--) {
+		queue_lock = hws_bwc_get_queue_lock(ctx, i);
+		mutex_unlock(queue_lock);
+	}
+}
+
+static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr,
+				      u32 priority,
+				      u8 size_log)
+{
+	memset(attr, 0, sizeof(*attr));
+
+	attr->priority = priority;
+	attr->optimize_using_rule_idx = 0;
+	attr->mode = MLX5HWS_MATCHER_RESOURCE_MODE_RULE;
+	attr->optimize_flow_src = MLX5HWS_MATCHER_FLOW_SRC_ANY;
+	attr->insert_mode = MLX5HWS_MATCHER_INSERT_BY_HASH;
+	attr->distribute_mode = MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH;
+	attr->rule.num_log = size_log;
+	attr->resizable = true;
+	attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM;
+}
+
+int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher,
+				      struct mlx5hws_table *table,
+				      u32 priority,
+				      u8 match_criteria_enable,
+				      struct mlx5hws_match_parameters *mask,
+				      enum mlx5hws_action_type action_types[])
+{
+	enum mlx5hws_action_type init_action_types[1] = { MLX5HWS_ACTION_TYP_LAST };
+	struct mlx5hws_context *ctx = table->ctx;
+	u16 bwc_queues = mlx5hws_bwc_queues(ctx);
+	struct mlx5hws_matcher_attr attr = {0};
+	int i;
+
+	bwc_matcher->rules = kcalloc(bwc_queues, sizeof(*bwc_matcher->rules), GFP_KERNEL);
+	if (!bwc_matcher->rules)
+		goto err;
+
+	for (i = 0; i < bwc_queues; i++)
+		INIT_LIST_HEAD(&bwc_matcher->rules[i]);
+
+	hws_bwc_matcher_init_attr(&attr,
+				  priority,
+				  MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG);
+
+	bwc_matcher->priority = priority;
+	bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG;
+
+	/* create dummy action template */
+	bwc_matcher->at[0] =
+		mlx5hws_action_template_create(action_types ?
+					       action_types : init_action_types);
+	if (!bwc_matcher->at[0]) {
+		mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n");
+		goto free_bwc_matcher_rules;
+	}
+
+	bwc_matcher->num_of_at = 1;
+
+	bwc_matcher->mt = mlx5hws_match_template_create(ctx,
+							mask->match_buf,
+							mask->match_sz,
+							match_criteria_enable);
+	if (!bwc_matcher->mt) {
+		mlx5hws_err(table->ctx, "BWC matcher: failed creating match template\n");
+		goto free_at;
+	}
+
+	bwc_matcher->matcher = mlx5hws_matcher_create(table,
+						      &bwc_matcher->mt, 1,
+						      &bwc_matcher->at[0],
+						      bwc_matcher->num_of_at,
+						      &attr);
+	if (!bwc_matcher->matcher) {
+		mlx5hws_err(table->ctx, "BWC matcher: failed creating HWS matcher\n");
+		goto free_mt;
+	}
+
+	return 0;
+
+free_mt:
+	mlx5hws_match_template_destroy(bwc_matcher->mt);
+free_at:
+	mlx5hws_action_template_destroy(bwc_matcher->at[0]);
+free_bwc_matcher_rules:
+	kfree(bwc_matcher->rules);
+err:
+	return -EINVAL;
+}
+
+struct mlx5hws_bwc_matcher *
+mlx5hws_bwc_matcher_create(struct mlx5hws_table *table,
+			   u32 priority,
+			   u8 match_criteria_enable,
+			   struct mlx5hws_match_parameters *mask)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher;
+	bool is_complex;
+	int ret;
+
+	if (!mlx5hws_context_bwc_supported(table->ctx)) {
+		mlx5hws_err(table->ctx,
+			    "BWC matcher: context created w/o BWC API compatibility\n");
+		return NULL;
+	}
+
+	bwc_matcher = kzalloc(sizeof(*bwc_matcher), GFP_KERNEL);
+	if (!bwc_matcher)
+		return NULL;
+
+	/* Check if the required match params can be all matched
+	 * in single STE, otherwise complex matcher is needed.
+	 */
+
+	is_complex = mlx5hws_bwc_match_params_is_complex(table->ctx, match_criteria_enable, mask);
+	if (is_complex)
+		ret = mlx5hws_bwc_matcher_create_complex(bwc_matcher,
+							 table,
+							 priority,
+							 match_criteria_enable,
+							 mask);
+	else
+		ret = mlx5hws_bwc_matcher_create_simple(bwc_matcher,
+							table,
+							priority,
+							match_criteria_enable,
+							mask,
+							NULL);
+	if (ret)
+		goto free_bwc_matcher;
+
+	return bwc_matcher;
+
+free_bwc_matcher:
+	kfree(bwc_matcher);
+
+	return NULL;
+}
+
+int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	int i;
+
+	mlx5hws_matcher_destroy(bwc_matcher->matcher);
+	bwc_matcher->matcher = NULL;
+
+	for (i = 0; i < bwc_matcher->num_of_at; i++)
+		mlx5hws_action_template_destroy(bwc_matcher->at[i]);
+
+	mlx5hws_match_template_destroy(bwc_matcher->mt);
+	kfree(bwc_matcher->rules);
+
+	return 0;
+}
+
+int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	if (bwc_matcher->num_of_rules)
+		mlx5hws_err(bwc_matcher->matcher->tbl->ctx,
+			    "BWC matcher destroy: matcher still has %d rules\n",
+			    bwc_matcher->num_of_rules);
+
+	mlx5hws_bwc_matcher_destroy_simple(bwc_matcher);
+
+	kfree(bwc_matcher);
+	return 0;
+}
+
+static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
+			      u16 queue_id,
+			      u32 *pending_rules,
+			      bool drain)
+{
+	struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH];
+	u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id);
+	bool got_comp = *pending_rules >= burst_th;
+	bool queue_full;
+	int err = 0;
+	int ret;
+	int i;
+
+	/* Check if there are any completions at all */
+	if (!got_comp && !drain)
+		return 0;
+
+	queue_full = mlx5hws_send_engine_full(&ctx->send_queue[queue_id]);
+	while (queue_full || ((got_comp || drain) && *pending_rules)) {
+		ret = mlx5hws_send_queue_poll(ctx, queue_id, comp, burst_th);
+		if (unlikely(ret < 0)) {
+			mlx5hws_err(ctx, "BWC poll error: polling queue %d returned %d\n",
+				    queue_id, ret);
+			return -EINVAL;
+		}
+
+		if (ret) {
+			(*pending_rules) -= ret;
+			for (i = 0; i < ret; i++) {
+				if (unlikely(comp[i].status != MLX5HWS_FLOW_OP_SUCCESS)) {
+					mlx5hws_err(ctx,
+						    "BWC poll error: polling queue %d returned completion with error\n",
+						    queue_id);
+					err = -EINVAL;
+				}
+			}
+			queue_full = false;
+		}
+
+		got_comp = !!ret;
+	}
+
+	return err;
+}
+
+void
+mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher,
+			   u16 bwc_queue_idx,
+			   u32 flow_source,
+			   struct mlx5hws_rule_attr *rule_attr)
+{
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+
+	/* no use of INSERT_BY_INDEX in bwc rule */
+	rule_attr->rule_idx = 0;
+
+	/* notify HW at each rule insertion/deletion */
+	rule_attr->burst = 0;
+
+	/* We don't need user data, but the API requires it to exist */
+	rule_attr->user_data = (void *)0xFACADE;
+
+	rule_attr->queue_id = mlx5hws_bwc_get_queue_id(ctx, bwc_queue_idx);
+	rule_attr->flow_source = flow_source;
+}
+
+struct mlx5hws_bwc_rule *
+mlx5hws_bwc_rule_alloc(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	struct mlx5hws_bwc_rule *bwc_rule;
+
+	bwc_rule = kzalloc(sizeof(*bwc_rule), GFP_KERNEL);
+	if (unlikely(!bwc_rule))
+		goto out_err;
+
+	bwc_rule->rule = kzalloc(sizeof(*bwc_rule->rule), GFP_KERNEL);
+	if (unlikely(!bwc_rule->rule))
+		goto free_rule;
+
+	bwc_rule->bwc_matcher = bwc_matcher;
+	return bwc_rule;
+
+free_rule:
+	kfree(bwc_rule);
+out_err:
+	return NULL;
+}
+
+void mlx5hws_bwc_rule_free(struct mlx5hws_bwc_rule *bwc_rule)
+{
+	if (likely(bwc_rule->rule))
+		kfree(bwc_rule->rule);
+	kfree(bwc_rule);
+}
+
+static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+
+	bwc_matcher->num_of_rules++;
+	bwc_rule->bwc_queue_idx = idx;
+	list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]);
+}
+
+static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+
+	bwc_matcher->num_of_rules--;
+	list_del_init(&bwc_rule->list_node);
+}
+
+static int
+hws_bwc_rule_destroy_hws_async(struct mlx5hws_bwc_rule *bwc_rule,
+			       struct mlx5hws_rule_attr *attr)
+{
+	return mlx5hws_rule_destroy(bwc_rule->rule, attr);
+}
+
+static int
+hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule,
+			      struct mlx5hws_rule_attr *rule_attr)
+{
+	struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_flow_op_result completion;
+	int ret;
+
+	ret = hws_bwc_rule_destroy_hws_async(bwc_rule, rule_attr);
+	if (unlikely(ret))
+		return ret;
+
+	do {
+		ret = mlx5hws_send_queue_poll(ctx, rule_attr->queue_id, &completion, 1);
+	} while (ret != 1);
+
+	if (unlikely(completion.status != MLX5HWS_FLOW_OP_SUCCESS ||
+		     (bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED &&
+		      bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING))) {
+		mlx5hws_err(ctx, "Failed destroying BWC rule: completion %d, rule status %d\n",
+			    completion.status, bwc_rule->rule->status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	u16 idx = bwc_rule->bwc_queue_idx;
+	struct mlx5hws_rule_attr attr;
+	struct mutex *queue_lock; /* Protect the queue */
+	int ret;
+
+	mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, 0, &attr);
+
+	queue_lock = hws_bwc_get_queue_lock(ctx, idx);
+
+	mutex_lock(queue_lock);
+
+	ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr);
+	hws_bwc_rule_list_remove(bwc_rule);
+
+	mutex_unlock(queue_lock);
+
+	return ret;
+}
+
+int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule)
+{
+	int ret;
+
+	ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule);
+
+	mlx5hws_bwc_rule_free(bwc_rule);
+	return ret;
+}
+
+static int
+hws_bwc_rule_create_async(struct mlx5hws_bwc_rule *bwc_rule,
+			  u32 *match_param,
+			  u8 at_idx,
+			  struct mlx5hws_rule_action rule_actions[],
+			  struct mlx5hws_rule_attr *rule_attr)
+{
+	return mlx5hws_rule_create(bwc_rule->bwc_matcher->matcher,
+				   0, /* only one match template supported */
+				   match_param,
+				   at_idx,
+				   rule_actions,
+				   rule_attr,
+				   bwc_rule->rule);
+}
+
+static int
+hws_bwc_rule_create_sync(struct mlx5hws_bwc_rule *bwc_rule,
+			 u32 *match_param,
+			 u8 at_idx,
+			 struct mlx5hws_rule_action rule_actions[],
+			 struct mlx5hws_rule_attr *rule_attr)
+
+{
+	struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx;
+	u32 expected_completions = 1;
+	int ret;
+
+	ret = hws_bwc_rule_create_async(bwc_rule, match_param,
+					at_idx, rule_actions,
+					rule_attr);
+	if (unlikely(ret))
+		return ret;
+
+	ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
+
+	return ret;
+}
+
+static int
+hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule,
+			 u8 at_idx,
+			 struct mlx5hws_rule_action rule_actions[],
+			 struct mlx5hws_rule_attr *rule_attr)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	u32 expected_completions = 1;
+	int ret;
+
+	ret = mlx5hws_rule_action_update(bwc_rule->rule,
+					 at_idx,
+					 rule_actions,
+					 rule_attr);
+	if (unlikely(ret))
+		return ret;
+
+	ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
+	if (unlikely(ret))
+		mlx5hws_err(ctx, "Failed updating BWC rule (%d)\n", ret);
+
+	return ret;
+}
+
+static bool
+hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps;
+
+	return bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH >=
+	       caps->ste_alloc_log_max - 1;
+}
+
+static bool
+hws_bwc_matcher_rehash_size_needed(struct mlx5hws_bwc_matcher *bwc_matcher,
+				   u32 num_of_rules)
+{
+	if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher)))
+		return false;
+
+	if (unlikely((num_of_rules * 100 / MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH) >=
+		     (1UL << bwc_matcher->size_log)))
+		return true;
+
+	return false;
+}
+
+static void
+hws_bwc_rule_actions_to_action_types(struct mlx5hws_rule_action rule_actions[],
+				     enum mlx5hws_action_type action_types[])
+{
+	int i = 0;
+
+	for (i = 0;
+	     rule_actions[i].action && (rule_actions[i].action->type != MLX5HWS_ACTION_TYP_LAST);
+	     i++) {
+		action_types[i] = (enum mlx5hws_action_type)rule_actions[i].action->type;
+	}
+
+	action_types[i] = MLX5HWS_ACTION_TYP_LAST;
+}
+
+static int
+hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher,
+			  struct mlx5hws_rule_action rule_actions[])
+{
+	enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS];
+
+	hws_bwc_rule_actions_to_action_types(rule_actions, action_types);
+
+	bwc_matcher->at[bwc_matcher->num_of_at] =
+		mlx5hws_action_template_create(action_types);
+
+	if (unlikely(!bwc_matcher->at[bwc_matcher->num_of_at]))
+		return -ENOMEM;
+
+	bwc_matcher->num_of_at++;
+	return 0;
+}
+
+static int
+hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_cmd_query_caps *caps = ctx->caps;
+
+	if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) {
+		mlx5hws_err(ctx, "Can't resize matcher: depth exceeds limit %d\n",
+			    caps->rtc_log_depth_max);
+		return -ENOMEM;
+	}
+
+	bwc_matcher->size_log =
+		min(bwc_matcher->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP,
+		    caps->ste_alloc_log_max - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH);
+
+	return 0;
+}
+
+static int
+hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher,
+			struct mlx5hws_rule_action rule_actions[])
+{
+	enum mlx5hws_action_type *action_type_arr;
+	int i, j;
+
+	/* start from index 1 - first action template is a dummy */
+	for (i = 1; i < bwc_matcher->num_of_at; i++) {
+		j = 0;
+		action_type_arr = bwc_matcher->at[i]->action_type_arr;
+
+		while (rule_actions[j].action &&
+		       rule_actions[j].action->type != MLX5HWS_ACTION_TYP_LAST) {
+			if (action_type_arr[j] != rule_actions[j].action->type)
+				break;
+			j++;
+		}
+
+		if (action_type_arr[j] == MLX5HWS_ACTION_TYP_LAST &&
+		    (!rule_actions[j].action ||
+		     rule_actions[j].action->type == MLX5HWS_ACTION_TYP_LAST))
+			return i;
+	}
+
+	return -1;
+}
+
+static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	u16 bwc_queues = mlx5hws_bwc_queues(ctx);
+	struct mlx5hws_bwc_rule **bwc_rules;
+	struct mlx5hws_rule_attr rule_attr;
+	u32 *pending_rules;
+	int i, j, ret = 0;
+	bool all_done;
+	u16 burst_th;
+
+	mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr);
+
+	pending_rules = kcalloc(bwc_queues, sizeof(*pending_rules), GFP_KERNEL);
+	if (!pending_rules)
+		return -ENOMEM;
+
+	bwc_rules = kcalloc(bwc_queues, sizeof(*bwc_rules), GFP_KERNEL);
+	if (!bwc_rules) {
+		ret = -ENOMEM;
+		goto free_pending_rules;
+	}
+
+	for (i = 0; i < bwc_queues; i++) {
+		if (list_empty(&bwc_matcher->rules[i]))
+			bwc_rules[i] = NULL;
+		else
+			bwc_rules[i] = list_first_entry(&bwc_matcher->rules[i],
+							struct mlx5hws_bwc_rule,
+							list_node);
+	}
+
+	do {
+		all_done = true;
+
+		for (i = 0; i < bwc_queues; i++) {
+			rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
+			burst_th = hws_bwc_get_burst_th(ctx, rule_attr.queue_id);
+
+			for (j = 0; j < burst_th && bwc_rules[i]; j++) {
+				rule_attr.burst = !!((j + 1) % burst_th);
+				ret = mlx5hws_matcher_resize_rule_move(bwc_matcher->matcher,
+								       bwc_rules[i]->rule,
+								       &rule_attr);
+				if (unlikely(ret)) {
+					mlx5hws_err(ctx,
+						    "Moving BWC rule failed during rehash (%d)\n",
+						    ret);
+					goto free_bwc_rules;
+				}
+
+				all_done = false;
+				pending_rules[i]++;
+				bwc_rules[i] = list_is_last(&bwc_rules[i]->list_node,
+							    &bwc_matcher->rules[i]) ?
+					       NULL : list_next_entry(bwc_rules[i], list_node);
+
+				ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id,
+							 &pending_rules[i], false);
+				if (unlikely(ret))
+					goto free_bwc_rules;
+			}
+		}
+	} while (!all_done);
+
+	/* drain all the bwc queues */
+	for (i = 0; i < bwc_queues; i++) {
+		if (pending_rules[i]) {
+			u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
+
+			mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]);
+			ret = hws_bwc_queue_poll(ctx, queue_id,
+						 &pending_rules[i], true);
+			if (unlikely(ret))
+				goto free_bwc_rules;
+		}
+	}
+
+free_bwc_rules:
+	kfree(bwc_rules);
+free_pending_rules:
+	kfree(pending_rules);
+
+	return ret;
+}
+
+static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	return hws_bwc_matcher_move_all_simple(bwc_matcher);
+}
+
+static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_matcher_attr matcher_attr = {0};
+	struct mlx5hws_matcher *old_matcher;
+	struct mlx5hws_matcher *new_matcher;
+	int ret;
+
+	hws_bwc_matcher_init_attr(&matcher_attr,
+				  bwc_matcher->priority,
+				  bwc_matcher->size_log);
+
+	old_matcher = bwc_matcher->matcher;
+	new_matcher = mlx5hws_matcher_create(old_matcher->tbl,
+					     &bwc_matcher->mt, 1,
+					     bwc_matcher->at,
+					     bwc_matcher->num_of_at,
+					     &matcher_attr);
+	if (!new_matcher) {
+		mlx5hws_err(ctx, "Rehash error: matcher creation failed\n");
+		return -ENOMEM;
+	}
+
+	ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher);
+	if (ret) {
+		mlx5hws_err(ctx, "Rehash error: failed setting resize target\n");
+		return ret;
+	}
+
+	ret = hws_bwc_matcher_move_all(bwc_matcher);
+	if (ret) {
+		mlx5hws_err(ctx, "Rehash error: moving rules failed\n");
+		return -ENOMEM;
+	}
+
+	bwc_matcher->matcher = new_matcher;
+	mlx5hws_matcher_destroy(old_matcher);
+
+	return 0;
+}
+
+static int
+hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	u32 num_of_rules;
+	int ret;
+
+	/* If the current matcher size is already at its max size, we can't
+	 * do the rehash. Skip it and try adding the rule again - perhaps
+	 * there was some change.
+	 */
+	if (hws_bwc_matcher_size_maxed_out(bwc_matcher))
+		return 0;
+
+	/* It is possible that other rule has already performed rehash.
+	 * Need to check again if we really need rehash.
+	 * If the reason for rehash was size, but not any more - skip rehash.
+	 */
+	num_of_rules = __atomic_load_n(&bwc_matcher->num_of_rules, __ATOMIC_RELAXED);
+	if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))
+		return 0;
+
+	/* Now we're done all the checking - do the rehash:
+	 *  - extend match RTC size
+	 *  - create new matcher
+	 *  - move all the rules to the new matcher
+	 *  - destroy the old matcher
+	 */
+
+	ret = hws_bwc_matcher_extend_size(bwc_matcher);
+	if (ret)
+		return ret;
+
+	return hws_bwc_matcher_move(bwc_matcher);
+}
+
+static int
+hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	/* Rehash by action template doesn't require any additional checking.
+	 * The bwc_matcher already contains the new action template.
+	 * Just do the usual rehash:
+	 *  - create new matcher
+	 *  - move all the rules to the new matcher
+	 *  - destroy the old matcher
+	 */
+	return hws_bwc_matcher_move(bwc_matcher);
+}
+
+int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
+				   u32 *match_param,
+				   struct mlx5hws_rule_action rule_actions[],
+				   u32 flow_source,
+				   u16 bwc_queue_idx)
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_rule_attr rule_attr;
+	struct mutex *queue_lock; /* Protect the queue */
+	u32 num_of_rules;
+	int ret = 0;
+	int at_idx;
+
+	mlx5hws_bwc_rule_fill_attr(bwc_matcher, bwc_queue_idx, flow_source, &rule_attr);
+
+	queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx);
+
+	mutex_lock(queue_lock);
+
+	/* check if rehash needed due to missing action template */
+	at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
+	if (unlikely(at_idx < 0)) {
+		/* we need to extend BWC matcher action templates array */
+		mutex_unlock(queue_lock);
+		hws_bwc_lock_all_queues(ctx);
+
+		ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions);
+		if (unlikely(ret)) {
+			hws_bwc_unlock_all_queues(ctx);
+			return ret;
+		}
+
+		/* action templates array was extended, we need the last idx */
+		at_idx = bwc_matcher->num_of_at - 1;
+
+		ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
+						bwc_matcher->at[at_idx]);
+		if (unlikely(ret)) {
+			/* Action template attach failed, possibly due to
+			 * requiring more action STEs.
+			 * Need to attempt creating new matcher with all
+			 * the action templates, including the new one.
+			 */
+			ret = hws_bwc_matcher_rehash_at(bwc_matcher);
+			if (unlikely(ret)) {
+				mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
+				bwc_matcher->at[at_idx] = NULL;
+				bwc_matcher->num_of_at--;
+
+				hws_bwc_unlock_all_queues(ctx);
+
+				mlx5hws_err(ctx,
+					    "BWC rule insertion: rehash AT failed (%d)\n", ret);
+				return ret;
+			}
+		}
+
+		hws_bwc_unlock_all_queues(ctx);
+		mutex_lock(queue_lock);
+	}
+
+	/* check if number of rules require rehash */
+	num_of_rules = bwc_matcher->num_of_rules;
+
+	if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) {
+		mutex_unlock(queue_lock);
+
+		hws_bwc_lock_all_queues(ctx);
+		ret = hws_bwc_matcher_rehash_size(bwc_matcher);
+		hws_bwc_unlock_all_queues(ctx);
+
+		if (ret) {
+			mlx5hws_err(ctx, "BWC rule insertion: rehash size [%d -> %d] failed (%d)\n",
+				    bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP,
+				    bwc_matcher->size_log,
+				    ret);
+			return ret;
+		}
+
+		mutex_lock(queue_lock);
+	}
+
+	ret = hws_bwc_rule_create_sync(bwc_rule,
+				       match_param,
+				       at_idx,
+				       rule_actions,
+				       &rule_attr);
+	if (likely(!ret)) {
+		hws_bwc_rule_list_add(bwc_rule, bwc_queue_idx);
+		mutex_unlock(queue_lock);
+		return 0; /* rule inserted successfully */
+	}
+
+	/* At this point the rule wasn't added.
+	 * It could be because there was collision, or some other problem.
+	 * If we don't dive deeper than API, the only thing we know is that
+	 * the status of completion is RTE_FLOW_OP_ERROR.
+	 * Try rehash by size and insert rule again - last chance.
+	 */
+
+	mutex_unlock(queue_lock);
+
+	hws_bwc_lock_all_queues(ctx);
+	ret = hws_bwc_matcher_rehash_size(bwc_matcher);
+	hws_bwc_unlock_all_queues(ctx);
+
+	if (ret) {
+		mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* Rehash done, but we still have that pesky rule to add */
+	mutex_lock(queue_lock);
+
+	ret = hws_bwc_rule_create_sync(bwc_rule,
+				       match_param,
+				       at_idx,
+				       rule_actions,
+				       &rule_attr);
+
+	if (unlikely(ret)) {
+		mutex_unlock(queue_lock);
+		mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret);
+		return ret;
+	}
+
+	hws_bwc_rule_list_add(bwc_rule, bwc_queue_idx);
+	mutex_unlock(queue_lock);
+
+	return 0;
+}
+
+struct mlx5hws_bwc_rule *
+mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher,
+			struct mlx5hws_match_parameters *params,
+			u32 flow_source,
+			struct mlx5hws_rule_action rule_actions[])
+{
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_bwc_rule *bwc_rule;
+	u16 bwc_queue_idx;
+	int ret;
+
+	if (unlikely(!mlx5hws_context_bwc_supported(ctx))) {
+		mlx5hws_err(ctx, "BWC rule: Context created w/o BWC API compatibility\n");
+		return NULL;
+	}
+
+	bwc_rule = mlx5hws_bwc_rule_alloc(bwc_matcher);
+	if (unlikely(!bwc_rule))
+		return NULL;
+
+	bwc_queue_idx = hws_bwc_gen_queue_idx(ctx);
+
+	ret = mlx5hws_bwc_rule_create_simple(bwc_rule,
+					     params->match_buf,
+					     rule_actions,
+					     flow_source,
+					     bwc_queue_idx);
+	if (unlikely(ret)) {
+		mlx5hws_bwc_rule_free(bwc_rule);
+		return NULL;
+	}
+
+	return bwc_rule;
+}
+
+static int
+hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
+			   struct mlx5hws_rule_action rule_actions[])
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+	struct mlx5hws_rule_attr rule_attr;
+	struct mutex *queue_lock; /* Protect the queue */
+	int at_idx, ret;
+	u16 idx;
+
+	idx = bwc_rule->bwc_queue_idx;
+
+	mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, 0, &rule_attr);
+	queue_lock = hws_bwc_get_queue_lock(ctx, idx);
+
+	mutex_lock(queue_lock);
+
+	/* check if rehash needed due to missing action template */
+	at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
+	if (unlikely(at_idx < 0)) {
+		/* we need to extend BWC matcher action templates array */
+		mutex_unlock(queue_lock);
+		hws_bwc_lock_all_queues(ctx);
+
+		/* check again - perhaps other thread already did extend_at */
+		at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
+		if (likely(at_idx < 0)) {
+			ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions);
+			if (unlikely(ret)) {
+				hws_bwc_unlock_all_queues(ctx);
+				mlx5hws_err(ctx, "BWC rule update: failed extending AT (%d)", ret);
+				return -EINVAL;
+			}
+
+			/* action templates array was extended, we need the last idx */
+			at_idx = bwc_matcher->num_of_at - 1;
+
+			ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
+							bwc_matcher->at[at_idx]);
+			if (unlikely(ret)) {
+				/* Action template attach failed, possibly due to
+				 * requiring more action STEs.
+				 * Need to attempt creating new matcher with all
+				 * the action templates, including the new one.
+				 */
+				ret = hws_bwc_matcher_rehash_at(bwc_matcher);
+				if (unlikely(ret)) {
+					mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
+					bwc_matcher->at[at_idx] = NULL;
+					bwc_matcher->num_of_at--;
+
+					hws_bwc_unlock_all_queues(ctx);
+
+					mlx5hws_err(ctx,
+						    "BWC rule update: rehash AT failed (%d)\n",
+						    ret);
+					return ret;
+				}
+			}
+		}
+
+		hws_bwc_unlock_all_queues(ctx);
+		mutex_lock(queue_lock);
+	}
+
+	ret = hws_bwc_rule_update_sync(bwc_rule,
+				       at_idx,
+				       rule_actions,
+				       &rule_attr);
+	mutex_unlock(queue_lock);
+
+	if (unlikely(ret))
+		mlx5hws_err(ctx, "BWC rule: update failed (%d)\n", ret);
+
+	return ret;
+}
+
+int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
+				   struct mlx5hws_rule_action rule_actions[])
+{
+	struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+	struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+
+	if (unlikely(!mlx5hws_context_bwc_supported(ctx))) {
+		mlx5hws_err(ctx, "BWC rule: Context created w/o BWC API compatibility\n");
+		return -EINVAL;
+	}
+
+	return hws_bwc_rule_action_update(bwc_rule, rule_actions);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h
new file mode 100644
index 000000000000..4fe8c32d8fbe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_BWC_H_
+#define MLX5HWS_BWC_H_
+
+#define MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG 1
+#define MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP 1
+#define MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH 70
+#define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32
+#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 255
+
+#define MLX5HWS_BWC_MAX_ACTS 16
+
+struct mlx5hws_bwc_matcher {
+	struct mlx5hws_matcher *matcher;
+	struct mlx5hws_match_template *mt;
+	struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM];
+	u8 num_of_at;
+	u16 priority;
+	u8 size_log;
+	u32 num_of_rules; /* atomically accessed */
+	struct list_head *rules;
+};
+
+struct mlx5hws_bwc_rule {
+	struct mlx5hws_bwc_matcher *bwc_matcher;
+	struct mlx5hws_rule *rule;
+	u16 bwc_queue_idx;
+	struct list_head list_node;
+};
+
+int
+mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher,
+				  struct mlx5hws_table *table,
+				  u32 priority,
+				  u8 match_criteria_enable,
+				  struct mlx5hws_match_parameters *mask,
+				  enum mlx5hws_action_type action_types[]);
+
+int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher);
+
+struct mlx5hws_bwc_rule *mlx5hws_bwc_rule_alloc(struct mlx5hws_bwc_matcher *bwc_matcher);
+
+void mlx5hws_bwc_rule_free(struct mlx5hws_bwc_rule *bwc_rule);
+
+int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
+				   u32 *match_param,
+				   struct mlx5hws_rule_action rule_actions[],
+				   u32 flow_source,
+				   u16 bwc_queue_idx);
+
+int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule);
+
+void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher,
+				u16 bwc_queue_idx,
+				u32 flow_source,
+				struct mlx5hws_rule_attr *rule_attr);
+
+static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx)
+{
+	/* Besides the control queue, half of the queues are
+	 * reguler HWS queues, and the other half are BWC queues.
+	 */
+	return (ctx->queues - 1) / 2;
+}
+
+static inline u16 mlx5hws_bwc_get_queue_id(struct mlx5hws_context *ctx, u16 idx)
+{
+	return idx + mlx5hws_bwc_queues(ctx);
+}
+
+#endif /* MLX5HWS_BWC_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c
new file mode 100644
index 000000000000..bb563f50ef09
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx,
+					 u8 match_criteria_enable,
+					 struct mlx5hws_match_parameters *mask)
+{
+	struct mlx5hws_definer match_layout = {0};
+	struct mlx5hws_match_template *mt;
+	bool is_complex = false;
+	int ret;
+
+	if (!match_criteria_enable)
+		return false; /* empty matcher */
+
+	mt = mlx5hws_match_template_create(ctx,
+					   mask->match_buf,
+					   mask->match_sz,
+					   match_criteria_enable);
+	if (!mt) {
+		mlx5hws_err(ctx, "BWC: failed creating match template\n");
+		return false;
+	}
+
+	ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout);
+	if (ret) {
+		/* The only case that we're interested in is E2BIG,
+		 * which means that the match parameters need to be
+		 * split into complex martcher.
+		 * For all other cases (good or bad) - just return true
+		 * and let the usual match creation path handle it,
+		 * both for good and bad flows.
+		 */
+		if (ret == E2BIG) {
+			is_complex = true;
+			mlx5hws_dbg(ctx, "Matcher definer layout: need complex matcher\n");
+		} else {
+			mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n");
+		}
+	}
+
+	mlx5hws_match_template_destroy(mt);
+
+	return is_complex;
+}
+
+int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher,
+				       struct mlx5hws_table *table,
+				       u32 priority,
+				       u8 match_criteria_enable,
+				       struct mlx5hws_match_parameters *mask)
+{
+	mlx5hws_err(table->ctx, "Complex matcher is not supported yet\n");
+	return -EOPNOTSUPP;
+}
+
+void
+mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	/* nothing to do here */
+}
+
+int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule,
+				    struct mlx5hws_match_parameters *params,
+				    u32 flow_source,
+				    struct mlx5hws_rule_action rule_actions[],
+				    u16 bwc_queue_idx)
+{
+	mlx5hws_err(bwc_rule->bwc_matcher->matcher->tbl->ctx,
+		    "Complex rule is not supported yet\n");
+	return -EOPNOTSUPP;
+}
+
+int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule)
+{
+	return 0;
+}
+
+int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+	mlx5hws_err(bwc_matcher->matcher->tbl->ctx,
+		    "Moving complex rule is not supported yet\n");
+	return -EOPNOTSUPP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.h
new file mode 100644
index 000000000000..068ee8118609
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc_complex.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_BWC_COMPLEX_H_
+#define MLX5HWS_BWC_COMPLEX_H_
+
+bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx,
+					 u8 match_criteria_enable,
+					 struct mlx5hws_match_parameters *mask);
+
+int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher,
+				       struct mlx5hws_table *table,
+				       u32 priority,
+				       u8 match_criteria_enable,
+				       struct mlx5hws_match_parameters *mask);
+
+void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher);
+
+int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher);
+
+int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule,
+				    struct mlx5hws_match_parameters *params,
+				    u32 flow_source,
+				    struct mlx5hws_rule_action rule_actions[],
+				    u16 bwc_queue_idx);
+
+int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule);
+
+#endif /* MLX5HWS_BWC_COMPLEX_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.c
new file mode 100644
index 000000000000..2c7b14172049
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.c
@@ -0,0 +1,1300 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+static enum mlx5_ifc_flow_destination_type
+hws_cmd_dest_type_to_ifc_dest_type(enum mlx5_flow_destination_type type)
+{
+	switch (type) {
+	case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	case MLX5_FLOW_DESTINATION_TYPE_TIR:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_TIR;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+	case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK;
+	case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE:
+		return MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE;
+	case MLX5_FLOW_DESTINATION_TYPE_NONE:
+	case MLX5_FLOW_DESTINATION_TYPE_PORT:
+	case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+	case MLX5_FLOW_DESTINATION_TYPE_RANGE:
+	default:
+		pr_warn("HWS: unknown flow dest type %d\n", type);
+		return 0;
+	}
+};
+
+static int hws_cmd_general_obj_destroy(struct mlx5_core_dev *mdev,
+				       u32 object_type,
+				       u32 object_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, object_type);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, object_id);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev,
+				  struct mlx5hws_cmd_ft_create_attr *ft_attr,
+				  u32 *table_id)
+{
+	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
+	void *ft_ctx;
+	int ret;
+
+	MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
+	MLX5_SET(create_flow_table_in, in, table_type, ft_attr->type);
+
+	ft_ctx = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
+	MLX5_SET(flow_table_context, ft_ctx, level, ft_attr->level);
+	MLX5_SET(flow_table_context, ft_ctx, rtc_valid, ft_attr->rtc_valid);
+	MLX5_SET(flow_table_context, ft_ctx, reformat_en, ft_attr->reformat_en);
+	MLX5_SET(flow_table_context, ft_ctx, decap_en, ft_attr->decap_en);
+
+	ret = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out);
+	if (ret)
+		return ret;
+
+	*table_id = MLX5_GET(create_flow_table_out, out, table_id);
+
+	return 0;
+}
+
+int mlx5hws_cmd_flow_table_modify(struct mlx5_core_dev *mdev,
+				  struct mlx5hws_cmd_ft_modify_attr *ft_attr,
+				  u32 table_id)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0};
+	void *ft_ctx;
+
+	MLX5_SET(modify_flow_table_in, in, opcode, MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+	MLX5_SET(modify_flow_table_in, in, table_type, ft_attr->type);
+	MLX5_SET(modify_flow_table_in, in, modify_field_select, ft_attr->modify_fs);
+	MLX5_SET(modify_flow_table_in, in, table_id, table_id);
+
+	ft_ctx = MLX5_ADDR_OF(modify_flow_table_in, in, flow_table_context);
+
+	MLX5_SET(flow_table_context, ft_ctx, table_miss_action, ft_attr->table_miss_action);
+	MLX5_SET(flow_table_context, ft_ctx, table_miss_id, ft_attr->table_miss_id);
+	MLX5_SET(flow_table_context, ft_ctx, hws.rtc_id_0, ft_attr->rtc_id_0);
+	MLX5_SET(flow_table_context, ft_ctx, hws.rtc_id_1, ft_attr->rtc_id_1);
+
+	return mlx5_cmd_exec_in(mdev, modify_flow_table, in);
+}
+
+int mlx5hws_cmd_flow_table_query(struct mlx5_core_dev *mdev,
+				 u32 table_id,
+				 struct mlx5hws_cmd_ft_query_attr *ft_attr,
+				 u64 *icm_addr_0, u64 *icm_addr_1)
+{
+	u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {0};
+	void *ft_ctx;
+	int ret;
+
+	MLX5_SET(query_flow_table_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_TABLE);
+	MLX5_SET(query_flow_table_in, in, table_type, ft_attr->type);
+	MLX5_SET(query_flow_table_in, in, table_id, table_id);
+
+	ret = mlx5_cmd_exec_inout(mdev, query_flow_table, in, out);
+	if (ret)
+		return ret;
+
+	ft_ctx = MLX5_ADDR_OF(query_flow_table_out, out, flow_table_context);
+	*icm_addr_0 = MLX5_GET64(flow_table_context, ft_ctx, sws.sw_owner_icm_root_0);
+	*icm_addr_1 = MLX5_GET64(flow_table_context, ft_ctx, sws.sw_owner_icm_root_1);
+
+	return ret;
+}
+
+int mlx5hws_cmd_flow_table_destroy(struct mlx5_core_dev *mdev,
+				   u8 fw_ft_type, u32 table_id)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0};
+
+	MLX5_SET(destroy_flow_table_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+	MLX5_SET(destroy_flow_table_in, in, table_type, fw_ft_type);
+	MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
+
+	return mlx5_cmd_exec_in(mdev, destroy_flow_table, in);
+}
+
+void mlx5hws_cmd_alias_flow_table_destroy(struct mlx5_core_dev *mdev,
+					  u32 table_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_FT_ALIAS, table_id);
+}
+
+static int hws_cmd_flow_group_create(struct mlx5_core_dev *mdev,
+				     struct mlx5hws_cmd_fg_attr *fg_attr,
+				     u32 *group_id)
+{
+	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	u32 *in;
+	int ret;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP);
+	MLX5_SET(create_flow_group_in, in, table_type, fg_attr->table_type);
+	MLX5_SET(create_flow_group_in, in, table_id, fg_attr->table_id);
+
+	ret = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out);
+	if (ret)
+		goto out;
+
+	*group_id = MLX5_GET(create_flow_group_out, out, group_id);
+
+out:
+	kvfree(in);
+	return ret;
+}
+
+static int hws_cmd_flow_group_destroy(struct mlx5_core_dev *mdev,
+				      u32 ft_id, u32 fg_id, u8 ft_type)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
+
+	MLX5_SET(destroy_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+	MLX5_SET(destroy_flow_group_in, in, table_type, ft_type);
+	MLX5_SET(destroy_flow_group_in, in, table_id, ft_id);
+	MLX5_SET(destroy_flow_group_in, in, group_id, fg_id);
+
+	return mlx5_cmd_exec_in(mdev, destroy_flow_group, in);
+}
+
+int mlx5hws_cmd_set_fte(struct mlx5_core_dev *mdev,
+			u32 table_type,
+			u32 table_id,
+			u32 group_id,
+			struct mlx5hws_cmd_set_fte_attr *fte_attr)
+{
+	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+	void *in_flow_context;
+	u32 dest_entry_sz;
+	u32 total_dest_sz;
+	u32 action_flags;
+	u8 *in_dests;
+	u32 inlen;
+	u32 *in;
+	int ret;
+	u32 i;
+
+	dest_entry_sz = fte_attr->extended_dest ?
+			MLX5_ST_SZ_BYTES(extended_dest_format) :
+			MLX5_ST_SZ_BYTES(dest_format);
+	total_dest_sz = dest_entry_sz * fte_attr->dests_num;
+	inlen = align((MLX5_ST_SZ_BYTES(set_fte_in) + total_dest_sz), DW_SIZE);
+	in = kzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+	MLX5_SET(set_fte_in, in, table_type, table_type);
+	MLX5_SET(set_fte_in, in, table_id, table_id);
+
+	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+	MLX5_SET(flow_context, in_flow_context, flow_source, fte_attr->flow_source);
+	MLX5_SET(flow_context, in_flow_context, extended_destination, fte_attr->extended_dest);
+	MLX5_SET(set_fte_in, in, ignore_flow_level, fte_attr->ignore_flow_level);
+
+	action_flags = fte_attr->action_flags;
+	MLX5_SET(flow_context, in_flow_context, action, action_flags);
+
+	if (action_flags & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+		MLX5_SET(flow_context, in_flow_context,
+			 packet_reformat_id, fte_attr->packet_reformat_id);
+	}
+
+	if (action_flags & (MLX5_FLOW_CONTEXT_ACTION_DECRYPT | MLX5_FLOW_CONTEXT_ACTION_ENCRYPT)) {
+		MLX5_SET(flow_context, in_flow_context,
+			 encrypt_decrypt_type, fte_attr->encrypt_decrypt_type);
+		MLX5_SET(flow_context, in_flow_context,
+			 encrypt_decrypt_obj_id, fte_attr->encrypt_decrypt_obj_id);
+	}
+
+	if (action_flags & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		in_dests = (u8 *)MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+
+		for (i = 0; i < fte_attr->dests_num; i++) {
+			struct mlx5hws_cmd_set_fte_dest *dest = &fte_attr->dests[i];
+			enum mlx5_ifc_flow_destination_type ifc_dest_type =
+				hws_cmd_dest_type_to_ifc_dest_type(dest->destination_type);
+
+			switch (dest->destination_type) {
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+				if (dest->ext_flags & MLX5HWS_CMD_EXT_DEST_ESW_OWNER_VHCA_ID) {
+					MLX5_SET(dest_format, in_dests,
+						 destination_eswitch_owner_vhca_id_valid, 1);
+					MLX5_SET(dest_format, in_dests,
+						 destination_eswitch_owner_vhca_id,
+						 dest->esw_owner_vhca_id);
+				}
+				fallthrough;
+			case MLX5_FLOW_DESTINATION_TYPE_TIR:
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+				MLX5_SET(dest_format, in_dests, destination_type, ifc_dest_type);
+				MLX5_SET(dest_format, in_dests, destination_id,
+					 dest->destination_id);
+				if (dest->ext_flags & MLX5HWS_CMD_EXT_DEST_REFORMAT) {
+					MLX5_SET(dest_format, in_dests, packet_reformat, 1);
+					MLX5_SET(extended_dest_format, in_dests, packet_reformat_id,
+						 dest->ext_reformat_id);
+				}
+				break;
+			default:
+				ret = -EOPNOTSUPP;
+				goto out;
+			}
+
+			in_dests = in_dests + dest_entry_sz;
+		}
+		MLX5_SET(flow_context, in_flow_context, destination_list_size, fte_attr->dests_num);
+	}
+
+	ret = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	if (ret)
+		mlx5_core_err(mdev, "Failed creating FLOW_TABLE_ENTRY\n");
+
+out:
+	kfree(in);
+	return ret;
+}
+
+int mlx5hws_cmd_delete_fte(struct mlx5_core_dev *mdev,
+			   u32 table_type,
+			   u32 table_id)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
+
+	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+	MLX5_SET(delete_fte_in, in, table_type, table_type);
+	MLX5_SET(delete_fte_in, in, table_id, table_id);
+
+	return mlx5_cmd_exec_in(mdev, delete_fte, in);
+}
+
+struct mlx5hws_cmd_forward_tbl *
+mlx5hws_cmd_forward_tbl_create(struct mlx5_core_dev *mdev,
+			       struct mlx5hws_cmd_ft_create_attr *ft_attr,
+			       struct mlx5hws_cmd_set_fte_attr *fte_attr)
+{
+	struct mlx5hws_cmd_fg_attr fg_attr = {0};
+	struct mlx5hws_cmd_forward_tbl *tbl;
+	int ret;
+
+	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+	if (!tbl)
+		return NULL;
+
+	ret = mlx5hws_cmd_flow_table_create(mdev, ft_attr, &tbl->ft_id);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create FT\n");
+		goto free_tbl;
+	}
+
+	fg_attr.table_id = tbl->ft_id;
+	fg_attr.table_type = ft_attr->type;
+
+	ret = hws_cmd_flow_group_create(mdev, &fg_attr, &tbl->fg_id);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create FG\n");
+		goto free_ft;
+	}
+
+	ret = mlx5hws_cmd_set_fte(mdev, ft_attr->type,
+				  tbl->ft_id, tbl->fg_id, fte_attr);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create FTE\n");
+		goto free_fg;
+	}
+
+	tbl->type = ft_attr->type;
+	return tbl;
+
+free_fg:
+	hws_cmd_flow_group_destroy(mdev, tbl->ft_id, tbl->fg_id, ft_attr->type);
+free_ft:
+	mlx5hws_cmd_flow_table_destroy(mdev, ft_attr->type, tbl->ft_id);
+free_tbl:
+	kfree(tbl);
+	return NULL;
+}
+
+void mlx5hws_cmd_forward_tbl_destroy(struct mlx5_core_dev *mdev,
+				     struct mlx5hws_cmd_forward_tbl *tbl)
+{
+	mlx5hws_cmd_delete_fte(mdev, tbl->type, tbl->ft_id);
+	hws_cmd_flow_group_destroy(mdev, tbl->ft_id, tbl->fg_id, tbl->type);
+	mlx5hws_cmd_flow_table_destroy(mdev, tbl->type, tbl->ft_id);
+	kfree(tbl);
+}
+
+void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx,
+					   u32 fw_ft_type,
+					   enum mlx5hws_table_type type,
+					   struct mlx5hws_cmd_ft_modify_attr *ft_attr)
+{
+	u32 default_miss_tbl;
+
+	if (type != MLX5HWS_TABLE_TYPE_FDB)
+		return;
+
+	ft_attr->modify_fs = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION;
+	ft_attr->type = fw_ft_type;
+	ft_attr->table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL;
+
+	default_miss_tbl = ctx->common_res[type].default_miss->ft_id;
+	if (!default_miss_tbl) {
+		pr_warn("HWS: no flow table ID for default miss\n");
+		return;
+	}
+
+	ft_attr->table_miss_id = default_miss_tbl;
+}
+
+int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_rtc_create_attr *rtc_attr,
+			   u32 *rtc_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_rtc_in)] = {0};
+	void *attr;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_rtc_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_RTC);
+
+	attr = MLX5_ADDR_OF(create_rtc_in, in, rtc);
+	MLX5_SET(rtc, attr, ste_format_0, rtc_attr->is_frst_jumbo ?
+		 MLX5_IFC_RTC_STE_FORMAT_11DW :
+		 MLX5_IFC_RTC_STE_FORMAT_8DW);
+
+	if (rtc_attr->is_scnd_range) {
+		MLX5_SET(rtc, attr, ste_format_1, MLX5_IFC_RTC_STE_FORMAT_RANGE);
+		MLX5_SET(rtc, attr, num_match_ste, 2);
+	}
+
+	MLX5_SET(rtc, attr, pd, rtc_attr->pd);
+	MLX5_SET(rtc, attr, update_method, rtc_attr->fw_gen_wqe);
+	MLX5_SET(rtc, attr, update_index_mode, rtc_attr->update_index_mode);
+	MLX5_SET(rtc, attr, access_index_mode, rtc_attr->access_index_mode);
+	MLX5_SET(rtc, attr, num_hash_definer, rtc_attr->num_hash_definer);
+	MLX5_SET(rtc, attr, log_depth, rtc_attr->log_depth);
+	MLX5_SET(rtc, attr, log_hash_size, rtc_attr->log_size);
+	MLX5_SET(rtc, attr, table_type, rtc_attr->table_type);
+	MLX5_SET(rtc, attr, num_hash_definer, rtc_attr->num_hash_definer);
+	MLX5_SET(rtc, attr, match_definer_0, rtc_attr->match_definer_0);
+	MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1);
+	MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base);
+	MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base);
+	MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset);
+	MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id);
+	MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create RTC\n");
+		goto out;
+	}
+
+	*rtc_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_rtc_destroy(struct mlx5_core_dev *mdev, u32 rtc_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_RTC, rtc_id);
+}
+
+int mlx5hws_cmd_stc_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_stc_create_attr *stc_attr,
+			   u32 *stc_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_stc_in)] = {0};
+	void *attr;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_stc_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_STC);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, op_param.create.log_obj_range, stc_attr->log_obj_range);
+
+	attr = MLX5_ADDR_OF(create_stc_in, in, stc);
+	MLX5_SET(stc, attr, table_type, stc_attr->table_type);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create STC\n");
+		goto out;
+	}
+
+	*stc_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_stc_destroy(struct mlx5_core_dev *mdev, u32 stc_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_STC, stc_id);
+}
+
+static int
+hws_cmd_stc_modify_set_stc_param(struct mlx5_core_dev *mdev,
+				 struct mlx5hws_cmd_stc_modify_attr *stc_attr,
+				 void *stc_param)
+{
+	switch (stc_attr->action_type) {
+	case MLX5_IFC_STC_ACTION_TYPE_COUNTER:
+		MLX5_SET(stc_ste_param_flow_counter, stc_param, flow_counter_id, stc_attr->id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_TIR:
+		MLX5_SET(stc_ste_param_tir, stc_param, tirn, stc_attr->dest_tir_num);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT:
+		MLX5_SET(stc_ste_param_table, stc_param, table_id, stc_attr->dest_table_id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_ACC_MODIFY_LIST:
+		MLX5_SET(stc_ste_param_header_modify_list, stc_param,
+			 header_modify_pattern_id, stc_attr->modify_header.pattern_id);
+		MLX5_SET(stc_ste_param_header_modify_list, stc_param,
+			 header_modify_argument_id, stc_attr->modify_header.arg_id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_HEADER_REMOVE:
+		MLX5_SET(stc_ste_param_remove, stc_param, action_type,
+			 MLX5_MODIFICATION_TYPE_REMOVE);
+		MLX5_SET(stc_ste_param_remove, stc_param, decap,
+			 stc_attr->remove_header.decap);
+		MLX5_SET(stc_ste_param_remove, stc_param, remove_start_anchor,
+			 stc_attr->remove_header.start_anchor);
+		MLX5_SET(stc_ste_param_remove, stc_param, remove_end_anchor,
+			 stc_attr->remove_header.end_anchor);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_HEADER_INSERT:
+		MLX5_SET(stc_ste_param_insert, stc_param, action_type,
+			 MLX5_MODIFICATION_TYPE_INSERT);
+		MLX5_SET(stc_ste_param_insert, stc_param, encap,
+			 stc_attr->insert_header.encap);
+		MLX5_SET(stc_ste_param_insert, stc_param, inline_data,
+			 stc_attr->insert_header.is_inline);
+		MLX5_SET(stc_ste_param_insert, stc_param, insert_anchor,
+			 stc_attr->insert_header.insert_anchor);
+		/* HW gets the next 2 sizes in words */
+		MLX5_SET(stc_ste_param_insert, stc_param, insert_size,
+			 stc_attr->insert_header.header_size / W_SIZE);
+		MLX5_SET(stc_ste_param_insert, stc_param, insert_offset,
+			 stc_attr->insert_header.insert_offset / W_SIZE);
+		MLX5_SET(stc_ste_param_insert, stc_param, insert_argument,
+			 stc_attr->insert_header.arg_id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_COPY:
+	case MLX5_IFC_STC_ACTION_TYPE_SET:
+	case MLX5_IFC_STC_ACTION_TYPE_ADD:
+	case MLX5_IFC_STC_ACTION_TYPE_ADD_FIELD:
+		*(__be64 *)stc_param = stc_attr->modify_action.data;
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_VPORT:
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_UPLINK:
+		MLX5_SET(stc_ste_param_vport, stc_param, vport_number,
+			 stc_attr->vport.vport_num);
+		MLX5_SET(stc_ste_param_vport, stc_param, eswitch_owner_vhca_id,
+			 stc_attr->vport.esw_owner_vhca_id);
+		MLX5_SET(stc_ste_param_vport, stc_param, eswitch_owner_vhca_id_valid,
+			 stc_attr->vport.eswitch_owner_vhca_id_valid);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_DROP:
+	case MLX5_IFC_STC_ACTION_TYPE_NOP:
+	case MLX5_IFC_STC_ACTION_TYPE_TAG:
+	case MLX5_IFC_STC_ACTION_TYPE_ALLOW:
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_ASO:
+		MLX5_SET(stc_ste_param_execute_aso, stc_param, aso_object_id,
+			 stc_attr->aso.devx_obj_id);
+		MLX5_SET(stc_ste_param_execute_aso, stc_param, return_reg_id,
+			 stc_attr->aso.return_reg_id);
+		MLX5_SET(stc_ste_param_execute_aso, stc_param, aso_type,
+			 stc_attr->aso.aso_type);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE:
+		MLX5_SET(stc_ste_param_ste_table, stc_param, ste_obj_id,
+			 stc_attr->ste_table.ste_obj_id);
+		MLX5_SET(stc_ste_param_ste_table, stc_param, match_definer_id,
+			 stc_attr->ste_table.match_definer_id);
+		MLX5_SET(stc_ste_param_ste_table, stc_param, log_hash_size,
+			 stc_attr->ste_table.log_hash_size);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_REMOVE_WORDS:
+		MLX5_SET(stc_ste_param_remove_words, stc_param, action_type,
+			 MLX5_MODIFICATION_TYPE_REMOVE_WORDS);
+		MLX5_SET(stc_ste_param_remove_words, stc_param, remove_start_anchor,
+			 stc_attr->remove_words.start_anchor);
+		MLX5_SET(stc_ste_param_remove_words, stc_param,
+			 remove_size, stc_attr->remove_words.num_of_words);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_CRYPTO_IPSEC_ENCRYPTION:
+		MLX5_SET(stc_ste_param_ipsec_encrypt, stc_param, ipsec_object_id,
+			 stc_attr->id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_CRYPTO_IPSEC_DECRYPTION:
+		MLX5_SET(stc_ste_param_ipsec_decrypt, stc_param, ipsec_object_id,
+			 stc_attr->id);
+		break;
+	case MLX5_IFC_STC_ACTION_TYPE_TRAILER:
+		MLX5_SET(stc_ste_param_trailer, stc_param, command,
+			 stc_attr->reformat_trailer.op);
+		MLX5_SET(stc_ste_param_trailer, stc_param, type,
+			 stc_attr->reformat_trailer.type);
+		MLX5_SET(stc_ste_param_trailer, stc_param, length,
+			 stc_attr->reformat_trailer.size);
+		break;
+	default:
+		mlx5_core_err(mdev, "Not supported type %d\n", stc_attr->action_type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int mlx5hws_cmd_stc_modify(struct mlx5_core_dev *mdev,
+			   u32 stc_id,
+			   struct mlx5hws_cmd_stc_modify_attr *stc_attr)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_stc_in)] = {0};
+	void *stc_param;
+	void *attr;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_stc_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_STC);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, stc_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in,
+		 op_param.query.obj_offset, stc_attr->stc_offset);
+
+	attr = MLX5_ADDR_OF(create_stc_in, in, stc);
+	MLX5_SET(stc, attr, ste_action_offset, stc_attr->action_offset);
+	MLX5_SET(stc, attr, action_type, stc_attr->action_type);
+	MLX5_SET(stc, attr, reparse_mode, stc_attr->reparse_mode);
+	MLX5_SET64(stc, attr, modify_field_select,
+		   MLX5_IFC_MODIFY_STC_FIELD_SELECT_NEW_STC);
+
+	/* Set destination TIRN, TAG, FT ID, STE ID */
+	stc_param = MLX5_ADDR_OF(stc, attr, stc_param);
+	ret = hws_cmd_stc_modify_set_stc_param(mdev, stc_attr, stc_param);
+	if (ret)
+		return ret;
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		mlx5_core_err(mdev, "Failed to modify STC FW action_type %d\n",
+			      stc_attr->action_type);
+
+	return ret;
+}
+
+int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev,
+			   u16 log_obj_range,
+			   u32 pd,
+			   u32 *arg_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_arg_in)] = {0};
+	void *attr;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_arg_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, op_param.create.log_obj_range, log_obj_range);
+
+	attr = MLX5_ADDR_OF(create_arg_in, in, arg);
+	MLX5_SET(arg, attr, access_pd, pd);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create ARG\n");
+		goto out;
+	}
+
+	*arg_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_arg_destroy(struct mlx5_core_dev *mdev,
+			     u32 arg_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT, arg_id);
+}
+
+int mlx5hws_cmd_header_modify_pattern_create(struct mlx5_core_dev *mdev,
+					     u32 pattern_length,
+					     u8 *actions,
+					     u32 *ptrn_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_header_modify_pattern_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	int num_of_actions;
+	u64 *pattern_data;
+	void *pattern;
+	void *attr;
+	int ret;
+	int i;
+
+	if (pattern_length > MLX5_MAX_ACTIONS_DATA_IN_HEADER_MODIFY) {
+		mlx5_core_err(mdev, "Pattern length %d exceeds limit %d\n",
+			      pattern_length, MLX5_MAX_ACTIONS_DATA_IN_HEADER_MODIFY);
+		return -EINVAL;
+	}
+
+	attr = MLX5_ADDR_OF(create_header_modify_pattern_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_MODIFY_HDR_PATTERN);
+
+	pattern = MLX5_ADDR_OF(create_header_modify_pattern_in, in, pattern);
+	/* Pattern_length is in ddwords */
+	MLX5_SET(header_modify_pattern_in, pattern, pattern_length, pattern_length / (2 * DW_SIZE));
+
+	pattern_data = (u64 *)MLX5_ADDR_OF(header_modify_pattern_in, pattern, pattern_data);
+	memcpy(pattern_data, actions, pattern_length);
+
+	num_of_actions = pattern_length / MLX5HWS_MODIFY_ACTION_SIZE;
+	for (i = 0; i < num_of_actions; i++) {
+		int type;
+
+		type = MLX5_GET(set_action_in, &pattern_data[i], action_type);
+		if (type != MLX5_MODIFICATION_TYPE_COPY &&
+		    type != MLX5_MODIFICATION_TYPE_ADD_FIELD)
+			/* Action typ-copy use all bytes for control */
+			MLX5_SET(set_action_in, &pattern_data[i], data, 0);
+	}
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create header_modify_pattern\n");
+		goto out;
+	}
+
+	*ptrn_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_header_modify_pattern_destroy(struct mlx5_core_dev *mdev,
+					       u32 ptrn_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_MODIFY_HDR_PATTERN, ptrn_id);
+}
+
+int mlx5hws_cmd_ste_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_ste_create_attr *ste_attr,
+			   u32 *ste_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_ste_in)] = {0};
+	void *attr;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_ste_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, MLX5_OBJ_TYPE_STE);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, op_param.create.log_obj_range, ste_attr->log_obj_range);
+
+	attr = MLX5_ADDR_OF(create_ste_in, in, ste);
+	MLX5_SET(ste, attr, table_type, ste_attr->table_type);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create STE\n");
+		goto out;
+	}
+
+	*ste_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_ste_destroy(struct mlx5_core_dev *mdev, u32 ste_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_STE, ste_id);
+}
+
+int mlx5hws_cmd_definer_create(struct mlx5_core_dev *mdev,
+			       struct mlx5hws_cmd_definer_create_attr *def_attr,
+			       u32 *definer_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_definer_in)] = {0};
+	void *ptr;
+	int ret;
+
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 in, obj_type, MLX5_OBJ_TYPE_MATCH_DEFINER);
+
+	ptr = MLX5_ADDR_OF(create_definer_in, in, definer);
+	MLX5_SET(definer, ptr, format_id, MLX5_IFC_DEFINER_FORMAT_ID_SELECT);
+
+	MLX5_SET(definer, ptr, format_select_dw0, def_attr->dw_selector[0]);
+	MLX5_SET(definer, ptr, format_select_dw1, def_attr->dw_selector[1]);
+	MLX5_SET(definer, ptr, format_select_dw2, def_attr->dw_selector[2]);
+	MLX5_SET(definer, ptr, format_select_dw3, def_attr->dw_selector[3]);
+	MLX5_SET(definer, ptr, format_select_dw4, def_attr->dw_selector[4]);
+	MLX5_SET(definer, ptr, format_select_dw5, def_attr->dw_selector[5]);
+	MLX5_SET(definer, ptr, format_select_dw6, def_attr->dw_selector[6]);
+	MLX5_SET(definer, ptr, format_select_dw7, def_attr->dw_selector[7]);
+	MLX5_SET(definer, ptr, format_select_dw8, def_attr->dw_selector[8]);
+
+	MLX5_SET(definer, ptr, format_select_byte0, def_attr->byte_selector[0]);
+	MLX5_SET(definer, ptr, format_select_byte1, def_attr->byte_selector[1]);
+	MLX5_SET(definer, ptr, format_select_byte2, def_attr->byte_selector[2]);
+	MLX5_SET(definer, ptr, format_select_byte3, def_attr->byte_selector[3]);
+	MLX5_SET(definer, ptr, format_select_byte4, def_attr->byte_selector[4]);
+	MLX5_SET(definer, ptr, format_select_byte5, def_attr->byte_selector[5]);
+	MLX5_SET(definer, ptr, format_select_byte6, def_attr->byte_selector[6]);
+	MLX5_SET(definer, ptr, format_select_byte7, def_attr->byte_selector[7]);
+
+	ptr = MLX5_ADDR_OF(definer, ptr, match_mask);
+	memcpy(ptr, def_attr->match_mask, MLX5_FLD_SZ_BYTES(definer, match_mask));
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create Definer\n");
+		goto out;
+	}
+
+	*definer_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+void mlx5hws_cmd_definer_destroy(struct mlx5_core_dev *mdev,
+				 u32 definer_id)
+{
+	hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_MATCH_DEFINER, definer_id);
+}
+
+int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev,
+				       struct mlx5hws_cmd_packet_reformat_create_attr *attr,
+				       u32 *reformat_id)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_out)] = {0};
+	size_t insz, cmd_data_sz, cmd_total_sz;
+	void *prctx;
+	void *pdata;
+	void *in;
+	int ret;
+
+	cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in);
+	cmd_total_sz += MLX5_ST_SZ_BYTES(packet_reformat_context_in);
+	cmd_data_sz = MLX5_FLD_SZ_BYTES(packet_reformat_context_in, reformat_data);
+	insz = align(cmd_total_sz + attr->data_sz - cmd_data_sz, DW_SIZE);
+	in = kzalloc(insz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+
+	prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in,
+			     packet_reformat_context);
+	pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
+
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_type, attr->type);
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, attr->reformat_param_0);
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, attr->data_sz);
+	memcpy(pdata, attr->data, attr->data_sz);
+
+	ret = mlx5_cmd_exec(mdev, in, insz, out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create packet reformat\n");
+		goto out;
+	}
+
+	*reformat_id = MLX5_GET(alloc_packet_reformat_out, out, packet_reformat_id);
+out:
+	kfree(in);
+	return ret;
+}
+
+int mlx5hws_cmd_packet_reformat_destroy(struct mlx5_core_dev *mdev,
+					u32 reformat_id)
+{
+	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_in)] = {0};
+	int ret;
+
+	MLX5_SET(dealloc_packet_reformat_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(dealloc_packet_reformat_in, in,
+		 packet_reformat_id, reformat_id);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		mlx5_core_err(mdev, "Failed to destroy packet_reformat\n");
+
+	return ret;
+}
+
+int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+	u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0};
+	void *sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+	int ret;
+
+	MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
+	MLX5_SET(modify_sq_in, in, sqn, sqn);
+	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		mlx5_core_err(mdev, "Failed to modify SQ\n");
+
+	return ret;
+}
+
+int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev,
+					struct mlx5hws_cmd_allow_other_vhca_access_attr *attr)
+{
+	u32 out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0};
+	void *key;
+	int ret;
+
+	MLX5_SET(allow_other_vhca_access_in,
+		 in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS);
+	MLX5_SET(allow_other_vhca_access_in,
+		 in, object_type_to_be_accessed, attr->obj_type);
+	MLX5_SET(allow_other_vhca_access_in,
+		 in, object_id_to_be_accessed, attr->obj_id);
+
+	key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key);
+	memcpy(key, attr->access_key, sizeof(attr->access_key));
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		mlx5_core_err(mdev, "Failed to execute ALLOW_OTHER_VHCA_ACCESS command\n");
+
+	return ret;
+}
+
+int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev,
+				 struct mlx5hws_cmd_alias_obj_create_attr *alias_attr,
+				 u32 *obj_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0};
+	void *attr;
+	void *key;
+	int ret;
+
+	attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr,
+		 attr, obj_type, alias_attr->obj_type);
+	MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.alias_object, 1);
+
+	attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx);
+	MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id);
+	MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id);
+
+	key = MLX5_ADDR_OF(alias_context, attr, access_key);
+	memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key));
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to create ALIAS OBJ\n");
+		goto out;
+	}
+
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+	return ret;
+}
+
+int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev,
+				  u16 obj_type,
+				  u32 obj_id)
+{
+	return hws_cmd_general_obj_destroy(mdev, obj_type, obj_id);
+}
+
+int mlx5hws_cmd_generate_wqe(struct mlx5_core_dev *mdev,
+			     struct mlx5hws_cmd_generate_wqe_attr *attr,
+			     struct mlx5_cqe64 *ret_cqe)
+{
+	u32 out[MLX5_ST_SZ_DW(generate_wqe_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(generate_wqe_in)] = {0};
+	u8 status;
+	void *ptr;
+	int ret;
+
+	MLX5_SET(generate_wqe_in, in, opcode, MLX5_CMD_OP_GENERATE_WQE);
+	MLX5_SET(generate_wqe_in, in, pdn, attr->pdn);
+
+	ptr = MLX5_ADDR_OF(generate_wqe_in, in, wqe_ctrl);
+	memcpy(ptr, attr->wqe_ctrl, MLX5_FLD_SZ_BYTES(generate_wqe_in, wqe_ctrl));
+
+	ptr = MLX5_ADDR_OF(generate_wqe_in, in, wqe_gta_ctrl);
+	memcpy(ptr, attr->gta_ctrl, MLX5_FLD_SZ_BYTES(generate_wqe_in, wqe_gta_ctrl));
+
+	ptr = MLX5_ADDR_OF(generate_wqe_in, in, wqe_gta_data_0);
+	memcpy(ptr, attr->gta_data_0, MLX5_FLD_SZ_BYTES(generate_wqe_in, wqe_gta_data_0));
+
+	if (attr->gta_data_1) {
+		ptr = MLX5_ADDR_OF(generate_wqe_in, in, wqe_gta_data_1);
+		memcpy(ptr, attr->gta_data_1, MLX5_FLD_SZ_BYTES(generate_wqe_in, wqe_gta_data_1));
+	}
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to write GTA WQE using FW\n");
+		return ret;
+	}
+
+	status = MLX5_GET(generate_wqe_out, out, status);
+	if (status) {
+		mlx5_core_err(mdev, "Invalid FW CQE status %d\n", status);
+		return -EINVAL;
+	}
+
+	ptr = MLX5_ADDR_OF(generate_wqe_out, out, cqe_data);
+	memcpy(ret_cqe, ptr, sizeof(*ret_cqe));
+
+	return ret;
+}
+
+int mlx5hws_cmd_query_caps(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_query_caps *caps)
+{
+	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {0};
+	u32 out_size;
+	u32 *out;
+	int ret;
+
+	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	out = kzalloc(out_size, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE | HCA_CAP_OPMOD_GET_CUR);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to query device caps\n");
+		goto out;
+	}
+
+	caps->wqe_based_update =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap.wqe_based_flow_table_update_cap);
+
+	caps->eswitch_manager = MLX5_GET(query_hca_cap_out, out,
+					 capability.cmd_hca_cap.eswitch_manager);
+
+	caps->flex_protocols = MLX5_GET(query_hca_cap_out, out,
+					capability.cmd_hca_cap.flex_parser_protocols);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED)
+		caps->flex_parser_id_geneve_tlv_option_0 =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.cmd_hca_cap.flex_parser_id_geneve_tlv_option_0);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED)
+		caps->flex_parser_id_mpls_over_gre =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.cmd_hca_cap.flex_parser_id_outer_first_mpls_over_gre);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
+		caps->flex_parser_id_mpls_over_udp =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.cmd_hca_cap.flex_parser_id_outer_first_mpls_over_udp_label);
+
+	caps->log_header_modify_argument_granularity =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap.log_header_modify_argument_granularity);
+
+	caps->log_header_modify_argument_granularity -=
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap.log_header_modify_argument_granularity_offset);
+
+	caps->log_header_modify_argument_max_alloc =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap.log_header_modify_argument_max_alloc);
+
+	caps->definer_format_sup =
+		MLX5_GET64(query_hca_cap_out, out,
+			   capability.cmd_hca_cap.match_definer_format_supported);
+
+	caps->vhca_id = MLX5_GET(query_hca_cap_out, out,
+				 capability.cmd_hca_cap.vhca_id);
+
+	caps->sq_ts_format = MLX5_GET(query_hca_cap_out, out,
+				      capability.cmd_hca_cap.sq_ts_format);
+
+	caps->ipsec_offload = MLX5_GET(query_hca_cap_out, out,
+				       capability.cmd_hca_cap.ipsec_offload);
+
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 | HCA_CAP_OPMOD_GET_CUR);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to query device caps 2\n");
+		goto out;
+	}
+
+	caps->full_dw_jumbo_support =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.format_select_dw_8_6_ext);
+
+	caps->format_select_gtpu_dw_0 =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.format_select_dw_gtpu_dw_0);
+
+	caps->format_select_gtpu_dw_1 =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.format_select_dw_gtpu_dw_1);
+
+	caps->format_select_gtpu_dw_2 =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.format_select_dw_gtpu_dw_2);
+
+	caps->format_select_gtpu_ext_dw_0 =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.format_select_dw_gtpu_first_ext_dw_0);
+
+	caps->supp_type_gen_wqe =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.generate_wqe_type);
+
+	caps->flow_table_hash_type =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.cmd_hca_cap_2.flow_table_hash_type);
+
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE | HCA_CAP_OPMOD_GET_CUR);
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to query flow table caps\n");
+		goto out;
+	}
+
+	caps->nic_ft.max_level =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.flow_table_nic_cap.flow_table_properties_nic_receive.max_ft_level);
+
+	caps->nic_ft.reparse =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.flow_table_nic_cap.flow_table_properties_nic_receive.reparse);
+
+	caps->nic_ft.ignore_flow_level_rtc_valid =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.flow_table_nic_cap.flow_table_properties_nic_receive.ignore_flow_level_rtc_valid);
+
+	caps->flex_parser_ok_bits_supp =
+		MLX5_GET(query_hca_cap_out, out,
+			 capability.flow_table_nic_cap.flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist);
+
+	if (caps->wqe_based_update) {
+		MLX5_SET(query_hca_cap_in, in, op_mod,
+			 MLX5_GET_HCA_CAP_OP_MOD_WQE_BASED_FLOW_TABLE | HCA_CAP_OPMOD_GET_CUR);
+
+		ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+		if (ret) {
+			mlx5_core_err(mdev, "Failed to query WQE based FT caps\n");
+			goto out;
+		}
+
+		caps->rtc_reparse_mode =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_reparse_mode);
+
+		caps->ste_format =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.ste_format);
+
+		caps->rtc_index_mode =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_index_mode);
+
+		caps->rtc_log_depth_max =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_log_depth_max);
+
+		caps->ste_alloc_log_max =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.ste_alloc_log_max);
+
+		caps->ste_alloc_log_gran =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.ste_alloc_log_granularity);
+
+		caps->trivial_match_definer =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.trivial_match_definer);
+
+		caps->stc_alloc_log_max =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.stc_alloc_log_max);
+
+		caps->stc_alloc_log_gran =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.stc_alloc_log_granularity);
+
+		caps->rtc_hash_split_table =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_hash_split_table);
+
+		caps->rtc_linear_lookup_table =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_linear_lookup_table);
+
+		caps->access_index_mode =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.access_index_mode);
+
+		caps->linear_match_definer =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.linear_match_definer_reg_c3);
+
+		caps->rtc_max_hash_def_gen_wqe =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.rtc_max_num_hash_definer_gen_wqe);
+
+		caps->supp_ste_format_gen_wqe =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.ste_format_gen_wqe);
+
+		caps->fdb_tir_stc =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.wqe_based_flow_table_cap.fdb_jump_to_tir_stc);
+	}
+
+	if (caps->eswitch_manager) {
+		MLX5_SET(query_hca_cap_in, in, op_mod,
+			 MLX5_GET_HCA_CAP_OP_MOD_ESW_FLOW_TABLE | HCA_CAP_OPMOD_GET_CUR);
+
+		ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+		if (ret) {
+			mlx5_core_err(mdev, "Failed to query flow table esw caps\n");
+			goto out;
+		}
+
+		caps->fdb_ft.max_level =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.flow_table_nic_cap.flow_table_properties_nic_receive.max_ft_level);
+
+		caps->fdb_ft.reparse =
+			MLX5_GET(query_hca_cap_out, out,
+				 capability.flow_table_nic_cap.flow_table_properties_nic_receive.reparse);
+
+		MLX5_SET(query_hca_cap_in, in, op_mod,
+			 MLX5_SET_HCA_CAP_OP_MOD_ESW | HCA_CAP_OPMOD_GET_CUR);
+
+		ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+		if (ret) {
+			mlx5_core_err(mdev, "Failed to query eswitch capabilities\n");
+			goto out;
+		}
+
+		if (MLX5_GET(query_hca_cap_out, out,
+			     capability.esw_cap.esw_manager_vport_number_valid))
+			caps->eswitch_manager_vport_number =
+				MLX5_GET(query_hca_cap_out, out,
+					 capability.esw_cap.esw_manager_vport_number);
+
+		caps->merged_eswitch = MLX5_GET(query_hca_cap_out, out,
+						capability.esw_cap.merged_eswitch);
+	}
+
+	ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to query device attributes\n");
+		goto out;
+	}
+
+	snprintf(caps->fw_ver, sizeof(caps->fw_ver), "%d.%d.%d",
+		 fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev));
+
+	caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev);
+
+out:
+	kfree(out);
+	return ret;
+}
+
+int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function,
+			   u16 vport_number, u16 *gvmi)
+{
+	bool ec_vf_func = other_function ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false;
+	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+	int out_size;
+	void *out;
+	int err;
+
+	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	out = kzalloc(out_size, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, other_function, other_function);
+	MLX5_SET(query_hca_cap_in, in, function_id,
+		 mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func));
+	MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func);
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR);
+
+	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+	if (err) {
+		kfree(out);
+		return err;
+	}
+
+	*gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
+
+	kfree(out);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.h
new file mode 100644
index 000000000000..2fbcf4ff571a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_cmd.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_CMD_H_
+#define MLX5HWS_CMD_H_
+
+#define WIRE_PORT 0xFFFF
+
+#define ACCESS_KEY_LEN	32
+
+enum mlx5hws_cmd_ext_dest_flags {
+	MLX5HWS_CMD_EXT_DEST_REFORMAT = 1 << 0,
+	MLX5HWS_CMD_EXT_DEST_ESW_OWNER_VHCA_ID = 1 << 1,
+};
+
+struct mlx5hws_cmd_set_fte_dest {
+	u8 destination_type;
+	u32 destination_id;
+	enum mlx5hws_cmd_ext_dest_flags ext_flags;
+	u32 ext_reformat_id;
+	u16 esw_owner_vhca_id;
+};
+
+struct mlx5hws_cmd_set_fte_attr {
+	u32 action_flags;
+	bool ignore_flow_level;
+	u8 flow_source;
+	u8 extended_dest;
+	u8 encrypt_decrypt_type;
+	u32 encrypt_decrypt_obj_id;
+	u32 packet_reformat_id;
+	u32 dests_num;
+	struct mlx5hws_cmd_set_fte_dest *dests;
+};
+
+struct mlx5hws_cmd_ft_create_attr {
+	u8 type;
+	u8 level;
+	bool rtc_valid;
+	bool decap_en;
+	bool reformat_en;
+};
+
+struct mlx5hws_cmd_ft_modify_attr {
+	u8 type;
+	u32 rtc_id_0;
+	u32 rtc_id_1;
+	u32 table_miss_id;
+	u8 table_miss_action;
+	u64 modify_fs;
+};
+
+struct mlx5hws_cmd_ft_query_attr {
+	u8 type;
+};
+
+struct mlx5hws_cmd_fg_attr {
+	u32 table_id;
+	u32 table_type;
+};
+
+struct mlx5hws_cmd_forward_tbl {
+	u8 type;
+	u32 ft_id;
+	u32 fg_id;
+	u32 refcount;
+};
+
+struct mlx5hws_cmd_rtc_create_attr {
+	u32 pd;
+	u32 stc_base;
+	u32 ste_base;
+	u32 ste_offset;
+	u32 miss_ft_id;
+	bool fw_gen_wqe;
+	u8 update_index_mode;
+	u8 access_index_mode;
+	u8 num_hash_definer;
+	u8 log_depth;
+	u8 log_size;
+	u8 table_type;
+	u8 match_definer_0;
+	u8 match_definer_1;
+	u8 reparse_mode;
+	bool is_frst_jumbo;
+	bool is_scnd_range;
+};
+
+struct mlx5hws_cmd_alias_obj_create_attr {
+	u32 obj_id;
+	u16 vhca_id;
+	u16 obj_type;
+	u8 access_key[ACCESS_KEY_LEN];
+};
+
+struct mlx5hws_cmd_stc_create_attr {
+	u8 log_obj_range;
+	u8 table_type;
+};
+
+struct mlx5hws_cmd_stc_modify_attr {
+	u32 stc_offset;
+	u8 action_offset;
+	u8 reparse_mode;
+	enum mlx5_ifc_stc_action_type action_type;
+	union {
+		u32 id; /* TIRN, TAG, FT ID, STE ID, CRYPTO */
+		struct {
+			u8 decap;
+			u16 start_anchor;
+			u16 end_anchor;
+		} remove_header;
+		struct {
+			u32 arg_id;
+			u32 pattern_id;
+		} modify_header;
+		struct {
+			__be64 data;
+		} modify_action;
+		struct {
+			u32 arg_id;
+			u32 header_size;
+			u8 is_inline;
+			u8 encap;
+			u16 insert_anchor;
+			u16 insert_offset;
+		} insert_header;
+		struct {
+			u8 aso_type;
+			u32 devx_obj_id;
+			u8 return_reg_id;
+		} aso;
+		struct {
+			u16 vport_num;
+			u16 esw_owner_vhca_id;
+			u8 eswitch_owner_vhca_id_valid;
+		} vport;
+		struct {
+			struct mlx5hws_pool_chunk ste;
+			struct mlx5hws_pool *ste_pool;
+			u32 ste_obj_id; /* Internal */
+			u32 match_definer_id;
+			u8 log_hash_size;
+			bool ignore_tx;
+		} ste_table;
+		struct {
+			u16 start_anchor;
+			u16 num_of_words;
+		} remove_words;
+		struct {
+			u8 type;
+			u8 op;
+			u8 size;
+		} reformat_trailer;
+
+		u32 dest_table_id;
+		u32 dest_tir_num;
+	};
+};
+
+struct mlx5hws_cmd_ste_create_attr {
+	u8 log_obj_range;
+	u8 table_type;
+};
+
+struct mlx5hws_cmd_definer_create_attr {
+	u8 *dw_selector;
+	u8 *byte_selector;
+	u8 *match_mask;
+};
+
+struct mlx5hws_cmd_allow_other_vhca_access_attr {
+	u16 obj_type;
+	u32 obj_id;
+	u8 access_key[ACCESS_KEY_LEN];
+};
+
+struct mlx5hws_cmd_packet_reformat_create_attr {
+	u8 type;
+	size_t data_sz;
+	void *data;
+	u8 reformat_param_0;
+};
+
+struct mlx5hws_cmd_query_ft_caps {
+	u8 max_level;
+	u8 reparse;
+	u8 ignore_flow_level_rtc_valid;
+};
+
+struct mlx5hws_cmd_generate_wqe_attr {
+	u8 *wqe_ctrl;
+	u8 *gta_ctrl;
+	u8 *gta_data_0;
+	u8 *gta_data_1;
+	u32 pdn;
+};
+
+struct mlx5hws_cmd_query_caps {
+	u32 flex_protocols;
+	u8 wqe_based_update;
+	u8 rtc_reparse_mode;
+	u16 ste_format;
+	u8 rtc_index_mode;
+	u8 ste_alloc_log_max;
+	u8 ste_alloc_log_gran;
+	u8 stc_alloc_log_max;
+	u8 stc_alloc_log_gran;
+	u8 rtc_log_depth_max;
+	u8 format_select_gtpu_dw_0;
+	u8 format_select_gtpu_dw_1;
+	u8 flow_table_hash_type;
+	u8 format_select_gtpu_dw_2;
+	u8 format_select_gtpu_ext_dw_0;
+	u8 access_index_mode;
+	u32 linear_match_definer;
+	bool full_dw_jumbo_support;
+	bool rtc_hash_split_table;
+	bool rtc_linear_lookup_table;
+	u32 supp_type_gen_wqe;
+	u8 rtc_max_hash_def_gen_wqe;
+	u16 supp_ste_format_gen_wqe;
+	struct mlx5hws_cmd_query_ft_caps nic_ft;
+	struct mlx5hws_cmd_query_ft_caps fdb_ft;
+	bool eswitch_manager;
+	bool merged_eswitch;
+	u32 eswitch_manager_vport_number;
+	u8 log_header_modify_argument_granularity;
+	u8 log_header_modify_argument_max_alloc;
+	u8 sq_ts_format;
+	u8 fdb_tir_stc;
+	u64 definer_format_sup;
+	u32 trivial_match_definer;
+	u32 vhca_id;
+	u32 shared_vhca_id;
+	char fw_ver[64];
+	bool ipsec_offload;
+	bool is_ecpf;
+	u8 flex_parser_ok_bits_supp;
+	u8 flex_parser_id_geneve_tlv_option_0;
+	u8 flex_parser_id_mpls_over_gre;
+	u8 flex_parser_id_mpls_over_udp;
+};
+
+int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev,
+				  struct mlx5hws_cmd_ft_create_attr *ft_attr,
+				  u32 *table_id);
+
+int mlx5hws_cmd_flow_table_modify(struct mlx5_core_dev *mdev,
+				  struct mlx5hws_cmd_ft_modify_attr *ft_attr,
+				  u32 table_id);
+
+int mlx5hws_cmd_flow_table_query(struct mlx5_core_dev *mdev,
+				 u32 obj_id,
+				 struct mlx5hws_cmd_ft_query_attr *ft_attr,
+				 u64 *icm_addr_0, u64 *icm_addr_1);
+
+int mlx5hws_cmd_flow_table_destroy(struct mlx5_core_dev *mdev,
+				   u8 fw_ft_type, u32 table_id);
+
+void mlx5hws_cmd_alias_flow_table_destroy(struct mlx5_core_dev *mdev,
+					  u32 table_id);
+
+int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_rtc_create_attr *rtc_attr,
+			   u32 *rtc_id);
+
+void mlx5hws_cmd_rtc_destroy(struct mlx5_core_dev *mdev, u32 rtc_id);
+
+int mlx5hws_cmd_stc_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_stc_create_attr *stc_attr,
+			   u32 *stc_id);
+
+int mlx5hws_cmd_stc_modify(struct mlx5_core_dev *mdev,
+			   u32 stc_id,
+			   struct mlx5hws_cmd_stc_modify_attr *stc_attr);
+
+void mlx5hws_cmd_stc_destroy(struct mlx5_core_dev *mdev, u32 stc_id);
+
+int mlx5hws_cmd_generate_wqe(struct mlx5_core_dev *mdev,
+			     struct mlx5hws_cmd_generate_wqe_attr *attr,
+			     struct mlx5_cqe64 *ret_cqe);
+
+int mlx5hws_cmd_ste_create(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_ste_create_attr *ste_attr,
+			   u32 *ste_id);
+
+void mlx5hws_cmd_ste_destroy(struct mlx5_core_dev *mdev, u32 ste_id);
+
+int mlx5hws_cmd_definer_create(struct mlx5_core_dev *mdev,
+			       struct mlx5hws_cmd_definer_create_attr *def_attr,
+			       u32 *definer_id);
+
+void mlx5hws_cmd_definer_destroy(struct mlx5_core_dev *mdev,
+				 u32 definer_id);
+
+int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev,
+			   u16 log_obj_range,
+			   u32 pd,
+			   u32 *arg_id);
+
+void mlx5hws_cmd_arg_destroy(struct mlx5_core_dev *mdev,
+			     u32 arg_id);
+
+int mlx5hws_cmd_header_modify_pattern_create(struct mlx5_core_dev *mdev,
+					     u32 pattern_length,
+					     u8 *actions,
+					     u32 *ptrn_id);
+
+void mlx5hws_cmd_header_modify_pattern_destroy(struct mlx5_core_dev *mdev,
+					       u32 ptrn_id);
+
+int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev,
+				       struct mlx5hws_cmd_packet_reformat_create_attr *attr,
+				       u32 *reformat_id);
+
+int mlx5hws_cmd_packet_reformat_destroy(struct mlx5_core_dev *mdev,
+					u32 reformat_id);
+
+int mlx5hws_cmd_set_fte(struct mlx5_core_dev *mdev,
+			u32 table_type,
+			u32 table_id,
+			u32 group_id,
+			struct mlx5hws_cmd_set_fte_attr *fte_attr);
+
+int mlx5hws_cmd_delete_fte(struct mlx5_core_dev *mdev,
+			   u32 table_type, u32 table_id);
+
+struct mlx5hws_cmd_forward_tbl *
+mlx5hws_cmd_forward_tbl_create(struct mlx5_core_dev *mdev,
+			       struct mlx5hws_cmd_ft_create_attr *ft_attr,
+			       struct mlx5hws_cmd_set_fte_attr *fte_attr);
+
+void mlx5hws_cmd_forward_tbl_destroy(struct mlx5_core_dev *mdev,
+				     struct mlx5hws_cmd_forward_tbl *tbl);
+
+int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev,
+				 struct mlx5hws_cmd_alias_obj_create_attr *alias_attr,
+				 u32 *obj_id);
+
+int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev,
+				  u16 obj_type,
+				  u32 obj_id);
+
+int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn);
+
+int mlx5hws_cmd_query_caps(struct mlx5_core_dev *mdev,
+			   struct mlx5hws_cmd_query_caps *caps);
+
+void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx,
+					   u32 fw_ft_type,
+					   enum mlx5hws_table_type type,
+					   struct mlx5hws_cmd_ft_modify_attr *ft_attr);
+
+int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev,
+					struct mlx5hws_cmd_allow_other_vhca_access_attr *attr);
+
+int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function,
+			   u16 vport_number, u16 *gvmi);
+
+#endif /* MLX5HWS_CMD_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.c
new file mode 100644
index 000000000000..00e4fdf4a558
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. */
+
+#include "mlx5hws_internal.h"
+
+bool mlx5hws_context_cap_dynamic_reparse(struct mlx5hws_context *ctx)
+{
+	return IS_BIT_SET(ctx->caps->rtc_reparse_mode, MLX5_IFC_RTC_REPARSE_BY_STC);
+}
+
+u8 mlx5hws_context_get_reparse_mode(struct mlx5hws_context *ctx)
+{
+	/* Prefer to use dynamic reparse, reparse only specific actions */
+	if (mlx5hws_context_cap_dynamic_reparse(ctx))
+		return MLX5_IFC_RTC_REPARSE_NEVER;
+
+	/* Otherwise use less efficient static */
+	return MLX5_IFC_RTC_REPARSE_ALWAYS;
+}
+
+static int hws_context_pools_init(struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_pool_attr pool_attr = {0};
+	u8 max_log_sz;
+	int ret;
+	int i;
+
+	ret = mlx5hws_pat_init_pattern_cache(&ctx->pattern_cache);
+	if (ret)
+		return ret;
+
+	ret = mlx5hws_definer_init_cache(&ctx->definer_cache);
+	if (ret)
+		goto uninit_pat_cache;
+
+	/* Create an STC pool per FT type */
+	pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC;
+	pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL;
+	max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max);
+	pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran);
+
+	for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
+		pool_attr.table_type = i;
+		ctx->stc_pool[i] = mlx5hws_pool_create(ctx, &pool_attr);
+		if (!ctx->stc_pool[i]) {
+			mlx5hws_err(ctx, "Failed to allocate STC pool [%d]", i);
+			ret = -ENOMEM;
+			goto free_stc_pools;
+		}
+	}
+
+	return 0;
+
+free_stc_pools:
+	for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++)
+		if (ctx->stc_pool[i])
+			mlx5hws_pool_destroy(ctx->stc_pool[i]);
+
+	mlx5hws_definer_uninit_cache(ctx->definer_cache);
+uninit_pat_cache:
+	mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache);
+	return ret;
+}
+
+static void hws_context_pools_uninit(struct mlx5hws_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
+		if (ctx->stc_pool[i])
+			mlx5hws_pool_destroy(ctx->stc_pool[i]);
+	}
+
+	mlx5hws_definer_uninit_cache(ctx->definer_cache);
+	mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache);
+}
+
+static int hws_context_init_pd(struct mlx5hws_context *ctx)
+{
+	int ret = 0;
+
+	ret = mlx5_core_alloc_pd(ctx->mdev, &ctx->pd_num);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to allocate PD\n");
+		return ret;
+	}
+
+	ctx->flags |= MLX5HWS_CONTEXT_FLAG_PRIVATE_PD;
+
+	return 0;
+}
+
+static int hws_context_uninit_pd(struct mlx5hws_context *ctx)
+{
+	if (ctx->flags & MLX5HWS_CONTEXT_FLAG_PRIVATE_PD)
+		mlx5_core_dealloc_pd(ctx->mdev, ctx->pd_num);
+
+	return 0;
+}
+
+static void hws_context_check_hws_supp(struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_cmd_query_caps *caps = ctx->caps;
+
+	/* HWS not supported on device / FW */
+	if (!caps->wqe_based_update) {
+		mlx5hws_err(ctx, "Required HWS WQE based insertion cap not supported\n");
+		return;
+	}
+
+	if (!caps->eswitch_manager) {
+		mlx5hws_err(ctx, "HWS is not supported for non eswitch manager port\n");
+		return;
+	}
+
+	/* Current solution requires all rules to set reparse bit */
+	if ((!caps->nic_ft.reparse ||
+	     (!caps->fdb_ft.reparse && caps->eswitch_manager)) ||
+	    !IS_BIT_SET(caps->rtc_reparse_mode, MLX5_IFC_RTC_REPARSE_ALWAYS)) {
+		mlx5hws_err(ctx, "Required HWS reparse cap not supported\n");
+		return;
+	}
+
+	/* FW/HW must support 8DW STE */
+	if (!IS_BIT_SET(caps->ste_format, MLX5_IFC_RTC_STE_FORMAT_8DW)) {
+		mlx5hws_err(ctx, "Required HWS STE format not supported\n");
+		return;
+	}
+
+	/* Adding rules by hash and by offset are requirements */
+	if (!IS_BIT_SET(caps->rtc_index_mode, MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH) ||
+	    !IS_BIT_SET(caps->rtc_index_mode, MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET)) {
+		mlx5hws_err(ctx, "Required HWS RTC update mode not supported\n");
+		return;
+	}
+
+	/* Support for SELECT definer ID is required */
+	if (!IS_BIT_SET(caps->definer_format_sup, MLX5_IFC_DEFINER_FORMAT_ID_SELECT)) {
+		mlx5hws_err(ctx, "Required HWS Dynamic definer not supported\n");
+		return;
+	}
+
+	ctx->flags |= MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT;
+}
+
+static int hws_context_init_hws(struct mlx5hws_context *ctx,
+				struct mlx5hws_context_attr *attr)
+{
+	int ret;
+
+	hws_context_check_hws_supp(ctx);
+
+	if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT))
+		return 0;
+
+	ret = hws_context_init_pd(ctx);
+	if (ret)
+		return ret;
+
+	ret = hws_context_pools_init(ctx);
+	if (ret)
+		goto uninit_pd;
+
+	if (attr->bwc)
+		ctx->flags |= MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT;
+
+	ret = mlx5hws_send_queues_open(ctx, attr->queues, attr->queue_size);
+	if (ret)
+		goto pools_uninit;
+
+	INIT_LIST_HEAD(&ctx->tbl_list);
+
+	return 0;
+
+pools_uninit:
+	hws_context_pools_uninit(ctx);
+uninit_pd:
+	hws_context_uninit_pd(ctx);
+	return ret;
+}
+
+static void hws_context_uninit_hws(struct mlx5hws_context *ctx)
+{
+	if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT))
+		return;
+
+	mlx5hws_send_queues_close(ctx);
+	hws_context_pools_uninit(ctx);
+	hws_context_uninit_pd(ctx);
+}
+
+struct mlx5hws_context *mlx5hws_context_open(struct mlx5_core_dev *mdev,
+					     struct mlx5hws_context_attr *attr)
+{
+	struct mlx5hws_context *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	ctx->mdev = mdev;
+
+	mutex_init(&ctx->ctrl_lock);
+	xa_init(&ctx->peer_ctx_xa);
+
+	ctx->caps = kzalloc(sizeof(*ctx->caps), GFP_KERNEL);
+	if (!ctx->caps)
+		goto free_ctx;
+
+	ret = mlx5hws_cmd_query_caps(mdev, ctx->caps);
+	if (ret)
+		goto free_caps;
+
+	ret = mlx5hws_vport_init_vports(ctx);
+	if (ret)
+		goto free_caps;
+
+	ret = hws_context_init_hws(ctx, attr);
+	if (ret)
+		goto uninit_vports;
+
+	mlx5hws_debug_init_dump(ctx);
+
+	return ctx;
+
+uninit_vports:
+	mlx5hws_vport_uninit_vports(ctx);
+free_caps:
+	kfree(ctx->caps);
+free_ctx:
+	xa_destroy(&ctx->peer_ctx_xa);
+	mutex_destroy(&ctx->ctrl_lock);
+	kfree(ctx);
+	return NULL;
+}
+
+int mlx5hws_context_close(struct mlx5hws_context *ctx)
+{
+	mlx5hws_debug_uninit_dump(ctx);
+	hws_context_uninit_hws(ctx);
+	mlx5hws_vport_uninit_vports(ctx);
+	kfree(ctx->caps);
+	xa_destroy(&ctx->peer_ctx_xa);
+	mutex_destroy(&ctx->ctrl_lock);
+	kfree(ctx);
+	return 0;
+}
+
+void mlx5hws_context_set_peer(struct mlx5hws_context *ctx,
+			      struct mlx5hws_context *peer_ctx,
+			      u16 peer_vhca_id)
+{
+	mutex_lock(&ctx->ctrl_lock);
+
+	if (xa_err(xa_store(&ctx->peer_ctx_xa, peer_vhca_id, peer_ctx, GFP_KERNEL)))
+		pr_warn("HWS: failed storing peer vhca ID in peer xarray\n");
+
+	mutex_unlock(&ctx->ctrl_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.h
new file mode 100644
index 000000000000..e5a7ce604334
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_context.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_CONTEXT_H_
+#define MLX5HWS_CONTEXT_H_
+
+enum mlx5hws_context_flags {
+	MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT = 1 << 0,
+	MLX5HWS_CONTEXT_FLAG_PRIVATE_PD = 1 << 1,
+	MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT = 1 << 2,
+};
+
+enum mlx5hws_context_shared_stc_type {
+	MLX5HWS_CONTEXT_SHARED_STC_DECAP_L3 = 0,
+	MLX5HWS_CONTEXT_SHARED_STC_DOUBLE_POP = 1,
+	MLX5HWS_CONTEXT_SHARED_STC_MAX = 2,
+};
+
+struct mlx5hws_context_common_res {
+	struct mlx5hws_action_default_stc *default_stc;
+	struct mlx5hws_action_shared_stc *shared_stc[MLX5HWS_CONTEXT_SHARED_STC_MAX];
+	struct mlx5hws_cmd_forward_tbl *default_miss;
+};
+
+struct mlx5hws_context_debug_info {
+	struct dentry *steering_debugfs;
+	struct dentry *fdb_debugfs;
+};
+
+struct mlx5hws_context_vports {
+	u16 esw_manager_gvmi;
+	u16 uplink_gvmi;
+	struct xarray vport_gvmi_xa;
+};
+
+struct mlx5hws_context {
+	struct mlx5_core_dev *mdev;
+	struct mlx5hws_cmd_query_caps *caps;
+	u32 pd_num;
+	struct mlx5hws_pool *stc_pool[MLX5HWS_TABLE_TYPE_MAX];
+	struct mlx5hws_context_common_res common_res[MLX5HWS_TABLE_TYPE_MAX];
+	struct mlx5hws_pattern_cache *pattern_cache;
+	struct mlx5hws_definer_cache *definer_cache;
+	struct mutex ctrl_lock; /* control lock to protect the whole context */
+	enum mlx5hws_context_flags flags;
+	struct mlx5hws_send_engine *send_queue;
+	size_t queues;
+	struct mutex *bwc_send_queue_locks; /* protect BWC queues */
+	struct list_head tbl_list;
+	struct mlx5hws_context_debug_info debug_info;
+	struct xarray peer_ctx_xa;
+	struct mlx5hws_context_vports vports;
+};
+
+static inline bool mlx5hws_context_bwc_supported(struct mlx5hws_context *ctx)
+{
+	return ctx->flags & MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT;
+}
+
+bool mlx5hws_context_cap_dynamic_reparse(struct mlx5hws_context *ctx);
+
+u8 mlx5hws_context_get_reparse_mode(struct mlx5hws_context *ctx);
+
+#endif /* MLX5HWS_CONTEXT_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.c
new file mode 100644
index 000000000000..2b8c5a4e1c4c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "mlx5hws_internal.h"
+
+static int
+hws_debug_dump_matcher_template_definer(struct seq_file *f,
+					void *parent_obj,
+					struct mlx5hws_definer *definer,
+					enum mlx5hws_debug_res_type type)
+{
+	int i;
+
+	if (!definer)
+		return 0;
+
+	seq_printf(f, "%d,0x%llx,0x%llx,%d,%d,",
+		   type,
+		   HWS_PTR_TO_ID(definer),
+		   HWS_PTR_TO_ID(parent_obj),
+		   definer->obj_id,
+		   definer->type);
+
+	for (i = 0; i < DW_SELECTORS; i++)
+		seq_printf(f, "0x%x%s", definer->dw_selector[i],
+			   (i == DW_SELECTORS - 1) ? "," : "-");
+
+	for (i = 0; i < BYTE_SELECTORS; i++)
+		seq_printf(f, "0x%x%s", definer->byte_selector[i],
+			   (i == BYTE_SELECTORS - 1) ? "," : "-");
+
+	for (i = 0; i < MLX5HWS_JUMBO_TAG_SZ; i++)
+		seq_printf(f, "%02x", definer->mask.jumbo[i]);
+
+	seq_puts(f, "\n");
+
+	return 0;
+}
+
+static int
+hws_debug_dump_matcher_match_template(struct seq_file *f, struct mlx5hws_matcher *matcher)
+{
+	enum mlx5hws_debug_res_type type;
+	int i, ret;
+
+	for (i = 0; i < matcher->num_of_mt; i++) {
+		struct mlx5hws_match_template *mt = &matcher->mt[i];
+
+		seq_printf(f, "%d,0x%llx,0x%llx,%d,%d,%d\n",
+			   MLX5HWS_DEBUG_RES_TYPE_MATCHER_MATCH_TEMPLATE,
+			   HWS_PTR_TO_ID(mt),
+			   HWS_PTR_TO_ID(matcher),
+			   mt->fc_sz,
+			   0, 0);
+
+		type = MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_MATCH_DEFINER;
+		ret = hws_debug_dump_matcher_template_definer(f, mt, mt->definer, type);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+hws_debug_dump_matcher_action_template(struct seq_file *f, struct mlx5hws_matcher *matcher)
+{
+	enum mlx5hws_action_type action_type;
+	int i, j;
+
+	for (i = 0; i < matcher->num_of_at; i++) {
+		struct mlx5hws_action_template *at = &matcher->at[i];
+
+		seq_printf(f, "%d,0x%llx,0x%llx,%d,%d,%d",
+			   MLX5HWS_DEBUG_RES_TYPE_MATCHER_ACTION_TEMPLATE,
+			   HWS_PTR_TO_ID(at),
+			   HWS_PTR_TO_ID(matcher),
+			   at->only_term,
+			   at->num_of_action_stes,
+			   at->num_actions);
+
+		for (j = 0; j < at->num_actions; j++) {
+			action_type = at->action_type_arr[j];
+			seq_printf(f, ",%s", mlx5hws_action_type_to_str(action_type));
+		}
+
+		seq_puts(f, "\n");
+	}
+
+	return 0;
+}
+
+static int
+hws_debug_dump_matcher_attr(struct seq_file *f, struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher_attr *attr = &matcher->attr;
+
+	seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d\n",
+		   MLX5HWS_DEBUG_RES_TYPE_MATCHER_ATTR,
+		   HWS_PTR_TO_ID(matcher),
+		   attr->priority,
+		   attr->mode,
+		   attr->table.sz_row_log,
+		   attr->table.sz_col_log,
+		   attr->optimize_using_rule_idx,
+		   attr->optimize_flow_src,
+		   attr->insert_mode,
+		   attr->distribute_mode);
+
+	return 0;
+}
+
+static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *matcher)
+{
+	enum mlx5hws_table_type tbl_type = matcher->tbl->type;
+	struct mlx5hws_cmd_ft_query_attr ft_attr = {0};
+	struct mlx5hws_pool_chunk *ste;
+	struct mlx5hws_pool *ste_pool;
+	u64 icm_addr_0 = 0;
+	u64 icm_addr_1 = 0;
+	u32 ste_0_id = -1;
+	u32 ste_1_id = -1;
+	int ret;
+
+	seq_printf(f, "%d,0x%llx,0x%llx,%d,%d,0x%llx",
+		   MLX5HWS_DEBUG_RES_TYPE_MATCHER,
+		   HWS_PTR_TO_ID(matcher),
+		   HWS_PTR_TO_ID(matcher->tbl),
+		   matcher->num_of_mt,
+		   matcher->end_ft_id,
+		   matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0);
+
+	ste = &matcher->match_ste.ste;
+	ste_pool = matcher->match_ste.pool;
+	if (ste_pool) {
+		ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+		if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
+			ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+	}
+
+	seq_printf(f, ",%d,%d,%d,%d",
+		   matcher->match_ste.rtc_0_id,
+		   (int)ste_0_id,
+		   matcher->match_ste.rtc_1_id,
+		   (int)ste_1_id);
+
+	ste = &matcher->action_ste[0].ste;
+	ste_pool = matcher->action_ste[0].pool;
+	if (ste_pool) {
+		ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+		if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
+			ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+		else
+			ste_1_id = -1;
+	} else {
+		ste_0_id = -1;
+		ste_1_id = -1;
+	}
+
+	ft_attr.type = matcher->tbl->fw_ft_type;
+	ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev,
+					   matcher->end_ft_id,
+					   &ft_attr,
+					   &icm_addr_0,
+					   &icm_addr_1);
+	if (ret)
+		return ret;
+
+	seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n",
+		   matcher->action_ste[0].rtc_0_id,
+		   (int)ste_0_id,
+		   matcher->action_ste[0].rtc_1_id,
+		   (int)ste_1_id,
+		   0,
+		   mlx5hws_debug_icm_to_idx(icm_addr_0),
+		   mlx5hws_debug_icm_to_idx(icm_addr_1));
+
+	ret = hws_debug_dump_matcher_attr(f, matcher);
+	if (ret)
+		return ret;
+
+	ret = hws_debug_dump_matcher_match_template(f, matcher);
+	if (ret)
+		return ret;
+
+	ret = hws_debug_dump_matcher_action_template(f, matcher);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hws_debug_dump_table(struct seq_file *f, struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_cmd_ft_query_attr ft_attr = {0};
+	struct mlx5hws_matcher *matcher;
+	u64 local_icm_addr_0 = 0;
+	u64 local_icm_addr_1 = 0;
+	u64 icm_addr_0 = 0;
+	u64 icm_addr_1 = 0;
+	int ret;
+
+	seq_printf(f, "%d,0x%llx,0x%llx,%d,%d,%d,%d,%d",
+		   MLX5HWS_DEBUG_RES_TYPE_TABLE,
+		   HWS_PTR_TO_ID(tbl),
+		   HWS_PTR_TO_ID(tbl->ctx),
+		   tbl->ft_id,
+		   MLX5HWS_TABLE_TYPE_BASE + tbl->type,
+		   tbl->fw_ft_type,
+		   tbl->level,
+		   0);
+
+	ft_attr.type = tbl->fw_ft_type;
+	ret = mlx5hws_cmd_flow_table_query(tbl->ctx->mdev,
+					   tbl->ft_id,
+					   &ft_attr,
+					   &icm_addr_0,
+					   &icm_addr_1);
+	if (ret)
+		return ret;
+
+	seq_printf(f, ",0x%llx,0x%llx,0x%llx,0x%llx,0x%llx\n",
+		   mlx5hws_debug_icm_to_idx(icm_addr_0),
+		   mlx5hws_debug_icm_to_idx(icm_addr_1),
+		   mlx5hws_debug_icm_to_idx(local_icm_addr_0),
+		   mlx5hws_debug_icm_to_idx(local_icm_addr_1),
+		   HWS_PTR_TO_ID(tbl->default_miss.miss_tbl));
+
+	list_for_each_entry(matcher, &tbl->matchers_list, list_node) {
+		ret = hws_debug_dump_matcher(f, matcher);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+hws_debug_dump_context_send_engine(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_send_engine *send_queue;
+	struct mlx5hws_send_ring *send_ring;
+	struct mlx5hws_send_ring_cq *cq;
+	struct mlx5hws_send_ring_sq *sq;
+	int i;
+
+	for (i = 0; i < (int)ctx->queues; i++) {
+		send_queue = &ctx->send_queue[i];
+		seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
+			   MLX5HWS_DEBUG_RES_TYPE_CONTEXT_SEND_ENGINE,
+			   HWS_PTR_TO_ID(ctx),
+			   i,
+			   send_queue->used_entries,
+			   send_queue->num_entries,
+			   1, /* one send ring per queue */
+			   send_queue->num_entries,
+			   send_queue->err,
+			   send_queue->completed.ci,
+			   send_queue->completed.pi,
+			   send_queue->completed.mask);
+
+		send_ring = &send_queue->send_ring;
+		cq = &send_ring->send_cq;
+		sq = &send_ring->send_sq;
+
+		seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
+			   MLX5HWS_DEBUG_RES_TYPE_CONTEXT_SEND_RING,
+			   HWS_PTR_TO_ID(ctx),
+			   0, /* one send ring per send queue */
+			   i,
+			   cq->mcq.cqn,
+			   0,
+			   0,
+			   0,
+			   0,
+			   0,
+			   0,
+			   cq->mcq.cqe_sz,
+			   sq->sqn,
+			   0,
+			   0,
+			   0);
+	}
+
+	return 0;
+}
+
+static int hws_debug_dump_context_caps(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_cmd_query_caps *caps = ctx->caps;
+
+	seq_printf(f, "%d,0x%llx,%s,%d,%d,%d,%d,",
+		   MLX5HWS_DEBUG_RES_TYPE_CONTEXT_CAPS,
+		   HWS_PTR_TO_ID(ctx),
+		   caps->fw_ver,
+		   caps->wqe_based_update,
+		   caps->ste_format,
+		   caps->ste_alloc_log_max,
+		   caps->log_header_modify_argument_max_alloc);
+
+	seq_printf(f, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%s\n",
+		   caps->flex_protocols,
+		   caps->rtc_reparse_mode,
+		   caps->rtc_index_mode,
+		   caps->ste_alloc_log_gran,
+		   caps->stc_alloc_log_max,
+		   caps->stc_alloc_log_gran,
+		   caps->rtc_log_depth_max,
+		   caps->format_select_gtpu_dw_0,
+		   caps->format_select_gtpu_dw_1,
+		   caps->format_select_gtpu_dw_2,
+		   caps->format_select_gtpu_ext_dw_0,
+		   caps->nic_ft.max_level,
+		   caps->nic_ft.reparse,
+		   caps->fdb_ft.max_level,
+		   caps->fdb_ft.reparse,
+		   caps->log_header_modify_argument_granularity,
+		   caps->linear_match_definer,
+		   "regc_3");
+
+	return 0;
+}
+
+static int hws_debug_dump_context_attr(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	seq_printf(f, "%u,0x%llx,%d,%zu,%d,%s,%d,%d\n",
+		   MLX5HWS_DEBUG_RES_TYPE_CONTEXT_ATTR,
+		   HWS_PTR_TO_ID(ctx),
+		   ctx->pd_num,
+		   ctx->queues,
+		   ctx->send_queue->num_entries,
+		   "None", /* no shared gvmi */
+		   ctx->caps->vhca_id,
+		   0xffff); /* no shared gvmi */
+
+	return 0;
+}
+
+static int hws_debug_dump_context_info(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	struct mlx5_core_dev *dev = ctx->mdev;
+	int ret;
+
+	seq_printf(f, "%d,0x%llx,%d,%s,%s.KERNEL_%u_%u_%u\n",
+		   MLX5HWS_DEBUG_RES_TYPE_CONTEXT,
+		   HWS_PTR_TO_ID(ctx),
+		   ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT,
+		   pci_name(dev->pdev),
+		   HWS_DEBUG_FORMAT_VERSION,
+		   LINUX_VERSION_MAJOR,
+		   LINUX_VERSION_PATCHLEVEL,
+		   LINUX_VERSION_SUBLEVEL);
+
+	ret = hws_debug_dump_context_attr(f, ctx);
+	if (ret)
+		return ret;
+
+	ret = hws_debug_dump_context_caps(f, ctx);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hws_debug_dump_context_stc_resource(struct seq_file *f,
+					       struct mlx5hws_context *ctx,
+					       u32 tbl_type,
+					       struct mlx5hws_pool_resource *resource)
+{
+	seq_printf(f, "%d,0x%llx,%u,%u\n",
+		   MLX5HWS_DEBUG_RES_TYPE_CONTEXT_STC,
+		   HWS_PTR_TO_ID(ctx),
+		   tbl_type,
+		   resource->base_id);
+
+	return 0;
+}
+
+static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_pool *stc_pool;
+	u32 table_type;
+	int ret;
+	int i;
+
+	for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
+		stc_pool = ctx->stc_pool[i];
+		table_type = MLX5HWS_TABLE_TYPE_BASE + i;
+
+		if (!stc_pool)
+			continue;
+
+		if (stc_pool->resource[0]) {
+			ret = hws_debug_dump_context_stc_resource(f, ctx, table_type,
+								  stc_pool->resource[0]);
+			if (ret)
+				return ret;
+		}
+
+		if (i == MLX5HWS_TABLE_TYPE_FDB && stc_pool->mirror_resource[0]) {
+			ret = hws_debug_dump_context_stc_resource(f, ctx, table_type,
+								  stc_pool->mirror_resource[0]);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	struct mlx5hws_table *tbl;
+	int ret;
+
+	ret = hws_debug_dump_context_info(f, ctx);
+	if (ret)
+		return ret;
+
+	ret = hws_debug_dump_context_send_engine(f, ctx);
+	if (ret)
+		return ret;
+
+	ret = hws_debug_dump_context_stc(f, ctx);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(tbl, &ctx->tbl_list, tbl_list_node) {
+		ret = hws_debug_dump_table(f, tbl);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+hws_debug_dump(struct seq_file *f, struct mlx5hws_context *ctx)
+{
+	int ret;
+
+	if (!f || !ctx)
+		return -EINVAL;
+
+	mutex_lock(&ctx->ctrl_lock);
+	ret = hws_debug_dump_context(f, ctx);
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return ret;
+}
+
+static int hws_dump_show(struct seq_file *file, void *priv)
+{
+	return hws_debug_dump(file, file->private);
+}
+DEFINE_SHOW_ATTRIBUTE(hws_dump);
+
+void mlx5hws_debug_init_dump(struct mlx5hws_context *ctx)
+{
+	struct mlx5_core_dev *dev = ctx->mdev;
+	char file_name[128];
+
+	ctx->debug_info.steering_debugfs =
+		debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev));
+	ctx->debug_info.fdb_debugfs =
+		debugfs_create_dir("fdb", ctx->debug_info.steering_debugfs);
+
+	sprintf(file_name, "ctx_%p", ctx);
+	debugfs_create_file(file_name, 0444, ctx->debug_info.fdb_debugfs,
+			    ctx, &hws_dump_fops);
+}
+
+void mlx5hws_debug_uninit_dump(struct mlx5hws_context *ctx)
+{
+	debugfs_remove_recursive(ctx->debug_info.steering_debugfs);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.h
new file mode 100644
index 000000000000..b93a536035d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_debug.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_DEBUG_H_
+#define MLX5HWS_DEBUG_H_
+
+#define HWS_DEBUG_FORMAT_VERSION "1.0"
+
+#define HWS_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL)
+
+enum mlx5hws_debug_res_type {
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT = 4000,
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT_ATTR = 4001,
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT_CAPS = 4002,
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT_SEND_ENGINE = 4003,
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT_SEND_RING = 4004,
+	MLX5HWS_DEBUG_RES_TYPE_CONTEXT_STC = 4005,
+
+	MLX5HWS_DEBUG_RES_TYPE_TABLE = 4100,
+
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER = 4200,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_ATTR = 4201,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_MATCH_TEMPLATE = 4202,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_MATCH_DEFINER = 4203,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_ACTION_TEMPLATE = 4204,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206,
+	MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207,
+};
+
+static inline u64
+mlx5hws_debug_icm_to_idx(u64 icm_addr)
+{
+	return (icm_addr >> 6) & 0xffffffff;
+}
+
+void mlx5hws_debug_init_dump(struct mlx5hws_context *ctx);
+void mlx5hws_debug_uninit_dump(struct mlx5hws_context *ctx);
+
+#endif /* MLX5HWS_DEBUG_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c
new file mode 100644
index 000000000000..3bdb5c90efff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.c
@@ -0,0 +1,2146 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+/* Pattern tunnel Layer bits. */
+#define MLX5_FLOW_LAYER_VXLAN      BIT(12)
+#define MLX5_FLOW_LAYER_VXLAN_GPE  BIT(13)
+#define MLX5_FLOW_LAYER_GRE        BIT(14)
+#define MLX5_FLOW_LAYER_MPLS       BIT(15)
+
+/* Pattern tunnel Layer bits (continued). */
+#define MLX5_FLOW_LAYER_IPIP       BIT(23)
+#define MLX5_FLOW_LAYER_IPV6_ENCAP BIT(24)
+#define MLX5_FLOW_LAYER_NVGRE      BIT(25)
+#define MLX5_FLOW_LAYER_GENEVE     BIT(26)
+
+#define MLX5_FLOW_ITEM_FLEX_TUNNEL BIT_ULL(39)
+
+/* Tunnel Masks. */
+#define MLX5_FLOW_LAYER_TUNNEL \
+	(MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
+	 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_NVGRE | MLX5_FLOW_LAYER_MPLS | \
+	 MLX5_FLOW_LAYER_IPIP | MLX5_FLOW_LAYER_IPV6_ENCAP | \
+	 MLX5_FLOW_LAYER_GENEVE | MLX5_FLOW_LAYER_GTP | \
+	 MLX5_FLOW_ITEM_FLEX_TUNNEL)
+
+#define GTP_PDU_SC	0x85
+#define BAD_PORT	0xBAD
+#define ETH_TYPE_IPV4_VXLAN	0x0800
+#define ETH_TYPE_IPV6_VXLAN	0x86DD
+#define UDP_GTPU_PORT	2152
+#define UDP_PORT_MPLS	6635
+#define UDP_GENEVE_PORT 6081
+#define UDP_ROCEV2_PORT	4791
+#define HWS_FLOW_LAYER_TUNNEL_NO_MPLS (MLX5_FLOW_LAYER_TUNNEL & ~MLX5_FLOW_LAYER_MPLS)
+
+#define STE_NO_VLAN	0x0
+#define STE_SVLAN	0x1
+#define STE_CVLAN	0x2
+#define STE_NO_L3	0x0
+#define STE_IPV4	0x1
+#define STE_IPV6	0x2
+#define STE_NO_L4	0x0
+#define STE_TCP		0x1
+#define STE_UDP		0x2
+#define STE_ICMP	0x3
+#define STE_ESP		0x3
+
+#define IPV4 0x4
+#define IPV6 0x6
+
+/* Setter function based on bit offset and mask, for 32bit DW */
+#define _HWS_SET32(p, v, byte_off, bit_off, mask) \
+	do { \
+		u32 _v = v; \
+		*((__be32 *)(p) + ((byte_off) / 4)) = \
+		cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + \
+			     ((byte_off) / 4))) & \
+			     (~((mask) << (bit_off)))) | \
+			    (((_v) & (mask)) << \
+			      (bit_off))); \
+	} while (0)
+
+/* Setter function based on bit offset and mask, for unaligned 32bit DW */
+#define HWS_SET32(p, v, byte_off, bit_off, mask) \
+	do { \
+		if (unlikely((bit_off) < 0)) { \
+			u32 _bit_off = -1 * (bit_off); \
+			u32 second_dw_mask = (mask) & ((1 << _bit_off) - 1); \
+			_HWS_SET32(p, (v) >> _bit_off, byte_off, 0, (mask) >> _bit_off); \
+			_HWS_SET32(p, (v) & second_dw_mask, (byte_off) + DW_SIZE, \
+				    (bit_off) % BITS_IN_DW, second_dw_mask); \
+		} else { \
+			_HWS_SET32(p, v, byte_off, (bit_off), (mask)); \
+		} \
+	} while (0)
+
+/* Getter for up to aligned 32bit DW */
+#define HWS_GET32(p, byte_off, bit_off, mask) \
+	((be32_to_cpu(*((__be32 *)(p) + ((byte_off) / 4))) >> (bit_off)) & (mask))
+
+#define HWS_CALC_FNAME(field, inner) \
+	((inner) ? MLX5HWS_DEFINER_FNAME_##field##_I : \
+		   MLX5HWS_DEFINER_FNAME_##field##_O)
+
+#define HWS_GET_MATCH_PARAM(match_param, hdr) \
+	MLX5_GET(fte_match_param, match_param, hdr)
+
+#define HWS_IS_FLD_SET(match_param, hdr) \
+	(!!(HWS_GET_MATCH_PARAM(match_param, hdr)))
+
+#define HWS_IS_FLD_SET_DW_ARR(match_param, hdr, sz_in_bits) ({ \
+		BUILD_BUG_ON((sz_in_bits) % 32); \
+		u32 sz = sz_in_bits; \
+		u32 res = 0; \
+		u32 dw_off = __mlx5_dw_off(fte_match_param, hdr); \
+		while (!res && sz >= 32) { \
+			res = *((match_param) + (dw_off++)); \
+			sz -= 32; \
+		} \
+		res; \
+	})
+
+#define HWS_IS_FLD_SET_SZ(match_param, hdr, sz_in_bits) \
+	(((sz_in_bits) > 32) ? HWS_IS_FLD_SET_DW_ARR(match_param, hdr, sz_in_bits) : \
+			       !!(HWS_GET_MATCH_PARAM(match_param, hdr)))
+
+#define HWS_GET64_MATCH_PARAM(match_param, hdr) \
+	MLX5_GET64(fte_match_param, match_param, hdr)
+
+#define HWS_IS_FLD64_SET(match_param, hdr) \
+	(!!(HWS_GET64_MATCH_PARAM(match_param, hdr)))
+
+#define HWS_CALC_HDR_SRC(fc, s_hdr) \
+	do { \
+		(fc)->s_bit_mask = __mlx5_mask(fte_match_param, s_hdr); \
+		(fc)->s_bit_off = __mlx5_dw_bit_off(fte_match_param, s_hdr); \
+		(fc)->s_byte_off = MLX5_BYTE_OFF(fte_match_param, s_hdr); \
+	} while (0)
+
+#define HWS_CALC_HDR_DST(fc, d_hdr) \
+	do { \
+		(fc)->bit_mask = __mlx5_mask(definer_hl, d_hdr); \
+		(fc)->bit_off = __mlx5_dw_bit_off(definer_hl, d_hdr); \
+		(fc)->byte_off = MLX5_BYTE_OFF(definer_hl, d_hdr); \
+	} while (0)
+
+#define HWS_CALC_HDR(fc, s_hdr, d_hdr) \
+	do { \
+		HWS_CALC_HDR_SRC(fc, s_hdr); \
+		HWS_CALC_HDR_DST(fc, d_hdr); \
+		(fc)->tag_set = &hws_definer_generic_set; \
+	} while (0)
+
+#define HWS_SET_HDR(fc_arr, match_param, fname, s_hdr, d_hdr) \
+	do { \
+		if (HWS_IS_FLD_SET(match_param, s_hdr)) \
+			HWS_CALC_HDR(&(fc_arr)[MLX5HWS_DEFINER_FNAME_##fname], s_hdr, d_hdr); \
+	} while (0)
+
+struct mlx5hws_definer_sel_ctrl {
+	u8 allowed_full_dw; /* Full DW selectors cover all offsets */
+	u8 allowed_lim_dw;  /* Limited DW selectors cover offset < 64 */
+	u8 allowed_bytes;   /* Bytes selectors, up to offset 255 */
+	u8 used_full_dw;
+	u8 used_lim_dw;
+	u8 used_bytes;
+	u8 full_dw_selector[DW_SELECTORS];
+	u8 lim_dw_selector[DW_SELECTORS_LIMITED];
+	u8 byte_selector[BYTE_SELECTORS];
+};
+
+struct mlx5hws_definer_conv_data {
+	struct mlx5hws_context *ctx;
+	struct mlx5hws_definer_fc *fc;
+	/* enum mlx5hws_definer_match_flag */
+	u32 match_flags;
+};
+
+static void
+hws_definer_ones_set(struct mlx5hws_definer_fc *fc,
+		     void *match_param,
+		     u8 *tag)
+{
+	HWS_SET32(tag, -1, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_generic_set(struct mlx5hws_definer_fc *fc,
+			void *match_param,
+			u8 *tag)
+{
+	/* Can be optimized */
+	u32 val = HWS_GET32(match_param, fc->s_byte_off, fc->s_bit_off, fc->s_bit_mask);
+
+	HWS_SET32(tag, val, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_outer_vlan_type_set(struct mlx5hws_definer_fc *fc,
+				void *match_param,
+				u8 *tag)
+{
+	if (HWS_GET_MATCH_PARAM(match_param, outer_headers.cvlan_tag))
+		HWS_SET32(tag, STE_CVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else if (HWS_GET_MATCH_PARAM(match_param, outer_headers.svlan_tag))
+		HWS_SET32(tag, STE_SVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else
+		HWS_SET32(tag, STE_NO_VLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_inner_vlan_type_set(struct mlx5hws_definer_fc *fc,
+				void *match_param,
+				u8 *tag)
+{
+	if (HWS_GET_MATCH_PARAM(match_param, inner_headers.cvlan_tag))
+		HWS_SET32(tag, STE_CVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else if (HWS_GET_MATCH_PARAM(match_param, inner_headers.svlan_tag))
+		HWS_SET32(tag, STE_SVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else
+		HWS_SET32(tag, STE_NO_VLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_second_vlan_type_set(struct mlx5hws_definer_fc *fc,
+				 void *match_param,
+				 u8 *tag,
+				 bool inner)
+{
+	u32 second_cvlan_tag = inner ?
+		HWS_GET_MATCH_PARAM(match_param, misc_parameters.inner_second_cvlan_tag) :
+		HWS_GET_MATCH_PARAM(match_param, misc_parameters.outer_second_cvlan_tag);
+	u32 second_svlan_tag = inner ?
+		HWS_GET_MATCH_PARAM(match_param, misc_parameters.inner_second_svlan_tag) :
+		HWS_GET_MATCH_PARAM(match_param, misc_parameters.outer_second_svlan_tag);
+
+	if (second_cvlan_tag)
+		HWS_SET32(tag, STE_CVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else if (second_svlan_tag)
+		HWS_SET32(tag, STE_SVLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else
+		HWS_SET32(tag, STE_NO_VLAN, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_inner_second_vlan_type_set(struct mlx5hws_definer_fc *fc,
+				       void *match_param,
+				       u8 *tag)
+{
+	hws_definer_second_vlan_type_set(fc, match_param, tag, true);
+}
+
+static void
+hws_definer_outer_second_vlan_type_set(struct mlx5hws_definer_fc *fc,
+				       void *match_param,
+				       u8 *tag)
+{
+	hws_definer_second_vlan_type_set(fc, match_param, tag, false);
+}
+
+static void hws_definer_icmp_dw1_set(struct mlx5hws_definer_fc *fc,
+				     void *match_param,
+				     u8 *tag)
+{
+	u32 code = HWS_GET_MATCH_PARAM(match_param, misc_parameters_3.icmp_code);
+	u32 type = HWS_GET_MATCH_PARAM(match_param, misc_parameters_3.icmp_type);
+	u32 dw = (type << __mlx5_dw_bit_off(header_icmp, type)) |
+		 (code << __mlx5_dw_bit_off(header_icmp, code));
+
+	HWS_SET32(tag, dw, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_icmpv6_dw1_set(struct mlx5hws_definer_fc *fc,
+			   void *match_param,
+			   u8 *tag)
+{
+	u32 code = HWS_GET_MATCH_PARAM(match_param, misc_parameters_3.icmpv6_code);
+	u32 type = HWS_GET_MATCH_PARAM(match_param, misc_parameters_3.icmpv6_type);
+	u32 dw = (type << __mlx5_dw_bit_off(header_icmp, type)) |
+		 (code << __mlx5_dw_bit_off(header_icmp, code));
+
+	HWS_SET32(tag, dw, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_l3_type_set(struct mlx5hws_definer_fc *fc,
+			void *match_param,
+			u8 *tag)
+{
+	u32 val = HWS_GET32(match_param, fc->s_byte_off, fc->s_bit_off, fc->s_bit_mask);
+
+	if (val == IPV4)
+		HWS_SET32(tag, STE_IPV4, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else if (val == IPV6)
+		HWS_SET32(tag, STE_IPV6, fc->byte_off, fc->bit_off, fc->bit_mask);
+	else
+		HWS_SET32(tag, STE_NO_L3, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_set_source_port_gvmi(struct mlx5hws_definer_fc *fc,
+				 void *match_param,
+				 u8 *tag,
+				 struct mlx5hws_context *peer_ctx)
+{
+	u16 source_port = HWS_GET_MATCH_PARAM(match_param, misc_parameters.source_port);
+	u16 vport_gvmi = 0;
+	int ret;
+
+	ret = mlx5hws_vport_get_gvmi(peer_ctx, source_port, &vport_gvmi);
+	if (ret) {
+		HWS_SET32(tag, BAD_PORT, fc->byte_off, fc->bit_off, fc->bit_mask);
+		mlx5hws_err(fc->ctx, "Vport 0x%x is disabled or invalid\n", source_port);
+		return;
+	}
+
+	if (vport_gvmi)
+		HWS_SET32(tag, vport_gvmi, fc->byte_off, fc->bit_off, fc->bit_mask);
+}
+
+static void
+hws_definer_set_source_gvmi_vhca_id(struct mlx5hws_definer_fc *fc,
+				    void *match_param,
+				    u8 *tag)
+__must_hold(&fc->ctx->ctrl_lock)
+{
+	int id = HWS_GET_MATCH_PARAM(match_param, misc_parameters.source_eswitch_owner_vhca_id);
+	struct mlx5hws_context *peer_ctx;
+
+	if (id == fc->ctx->caps->vhca_id)
+		peer_ctx = fc->ctx;
+	else
+		peer_ctx = xa_load(&fc->ctx->peer_ctx_xa, id);
+
+	if (!peer_ctx) {
+		HWS_SET32(tag, BAD_PORT, fc->byte_off, fc->bit_off, fc->bit_mask);
+		mlx5hws_err(fc->ctx, "Invalid vhca_id provided 0x%x\n", id);
+		return;
+	}
+
+	hws_definer_set_source_port_gvmi(fc, match_param, tag, peer_ctx);
+}
+
+static void
+hws_definer_set_source_gvmi(struct mlx5hws_definer_fc *fc,
+			    void *match_param,
+			    u8 *tag)
+{
+	hws_definer_set_source_port_gvmi(fc, match_param, tag, fc->ctx);
+}
+
+static struct mlx5hws_definer_fc *
+hws_definer_flex_parser_steering_ok_bits_handler(struct mlx5hws_definer_conv_data *cd,
+						 u8 parser_id)
+{
+	struct mlx5hws_definer_fc *fc;
+
+	switch (parser_id) {
+	case 0:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser0_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 1:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser1_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 2:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser2_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 3:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser3_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 4:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser4_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 5:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser5_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 6:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser6_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 7:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK];
+		HWS_CALC_HDR_DST(fc, oks1.flex_parser7_steering_ok);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	default:
+		mlx5hws_err(cd->ctx, "Unsupported flex parser steering ok index %u\n", parser_id);
+		return NULL;
+	}
+
+	return fc;
+}
+
+static struct mlx5hws_definer_fc *
+hws_definer_flex_parser_handler(struct mlx5hws_definer_conv_data *cd,
+				u8 parser_id)
+{
+	struct mlx5hws_definer_fc *fc;
+
+	switch (parser_id) {
+	case 0:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_0);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 1:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_1);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 2:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_2);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 3:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_3);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 4:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_4);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 5:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_5);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 6:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_6);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	case 7:
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7];
+		HWS_CALC_HDR_DST(fc, flex_parser.flex_parser_7);
+		fc->tag_set = &hws_definer_generic_set;
+		break;
+	default:
+		mlx5hws_err(cd->ctx, "Unsupported flex parser %u\n", parser_id);
+		return NULL;
+	}
+
+	return fc;
+}
+
+static struct mlx5hws_definer_fc *
+hws_definer_misc4_fields_handler(struct mlx5hws_definer_conv_data *cd,
+				 bool *parser_is_used,
+				 u32 id,
+				 u32 value)
+{
+	if (id || value) {
+		if (id >= HWS_NUM_OF_FLEX_PARSERS) {
+			mlx5hws_err(cd->ctx, "Unsupported parser id\n");
+			return NULL;
+		}
+
+		if (parser_is_used[id]) {
+			mlx5hws_err(cd->ctx, "Parser id have been used\n");
+			return NULL;
+		}
+	}
+
+	parser_is_used[id] = true;
+
+	return hws_definer_flex_parser_handler(cd, id);
+}
+
+static int
+hws_definer_check_match_flags(struct mlx5hws_definer_conv_data *cd)
+{
+	u32 flags;
+
+	flags = cd->match_flags & (MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN_GPE |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_0_1);
+	if (flags & (flags - 1))
+		goto err_conflict;
+
+	flags = cd->match_flags & (MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE_OPT_KEY |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_2);
+
+	if (flags & (flags - 1))
+		goto err_conflict;
+
+	flags = cd->match_flags & (MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_GRE |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_UDP);
+	if (flags & (flags - 1))
+		goto err_conflict;
+
+	flags = cd->match_flags & (MLX5HWS_DEFINER_MATCH_FLAG_ICMPV4 |
+				   MLX5HWS_DEFINER_MATCH_FLAG_ICMPV6 |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TCP_O |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TCP_I);
+	if (flags & (flags - 1))
+		goto err_conflict;
+
+	return 0;
+
+err_conflict:
+	mlx5hws_err(cd->ctx, "Invalid definer fields combination\n");
+	return -EINVAL;
+}
+
+static int
+hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
+		       u32 *match_param)
+{
+	bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set;
+	struct mlx5hws_definer_fc *fc = cd->fc;
+	struct mlx5hws_definer_fc *curr_fc;
+	u32 *s_ipv6, *d_ipv6;
+
+	if (HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type, 0x2) ||
+	    HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c2, 0xe) ||
+	    HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c4, 0x4)) {
+		mlx5hws_err(cd->ctx, "Unsupported outer parameters set\n");
+		return -EINVAL;
+	}
+
+	/* L2 Check ethertype */
+	HWS_SET_HDR(fc, match_param, ETH_TYPE_O,
+		    outer_headers.ethertype,
+		    eth_l2_outer.l3_ethertype);
+	/* L2 Check SMAC 47_16 */
+	HWS_SET_HDR(fc, match_param, ETH_SMAC_47_16_O,
+		    outer_headers.smac_47_16, eth_l2_src_outer.smac_47_16);
+	/* L2 Check SMAC 15_0 */
+	HWS_SET_HDR(fc, match_param, ETH_SMAC_15_0_O,
+		    outer_headers.smac_15_0, eth_l2_src_outer.smac_15_0);
+	/* L2 Check DMAC 47_16 */
+	HWS_SET_HDR(fc, match_param, ETH_DMAC_47_16_O,
+		    outer_headers.dmac_47_16, eth_l2_outer.dmac_47_16);
+	/* L2 Check DMAC 15_0 */
+	HWS_SET_HDR(fc, match_param, ETH_DMAC_15_0_O,
+		    outer_headers.dmac_15_0, eth_l2_outer.dmac_15_0);
+
+	/* L2 VLAN */
+	HWS_SET_HDR(fc, match_param, VLAN_FIRST_PRIO_O,
+		    outer_headers.first_prio, eth_l2_outer.first_priority);
+	HWS_SET_HDR(fc, match_param, VLAN_CFI_O,
+		    outer_headers.first_cfi, eth_l2_outer.first_cfi);
+	HWS_SET_HDR(fc, match_param, VLAN_ID_O,
+		    outer_headers.first_vid, eth_l2_outer.first_vlan_id);
+
+	/* L2 CVLAN and SVLAN */
+	if (HWS_GET_MATCH_PARAM(match_param, outer_headers.cvlan_tag) ||
+	    HWS_GET_MATCH_PARAM(match_param, outer_headers.svlan_tag)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_outer.first_vlan_qualifier);
+		curr_fc->tag_set = &hws_definer_outer_vlan_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+	}
+
+	/* L3 Check IP header */
+	HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O,
+		    outer_headers.ip_protocol,
+		    eth_l3_outer.protocol_next_header);
+	HWS_SET_HDR(fc, match_param, IP_TTL_O,
+		    outer_headers.ttl_hoplimit,
+		    eth_l3_outer.time_to_live_hop_limit);
+
+	/* L3 Check IPv4/IPv6 addresses */
+	s_ipv6 = MLX5_ADDR_OF(fte_match_param, match_param,
+			      outer_headers.src_ipv4_src_ipv6.ipv6_layout);
+	d_ipv6 = MLX5_ADDR_OF(fte_match_param, match_param,
+			      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout);
+
+	/* Assume IPv6 is used if ipv6 bits are set */
+	is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2];
+	is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
+
+	if (is_s_ipv6) {
+		/* Handle IPv6 source address */
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96,
+			    ipv6_src_outer.ipv6_address_127_96);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_95_64_O,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64,
+			    ipv6_src_outer.ipv6_address_95_64);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_63_32_O,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32,
+			    ipv6_src_outer.ipv6_address_63_32);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_O,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv6_src_outer.ipv6_address_31_0);
+	} else {
+		/* Handle IPv4 source address */
+		HWS_SET_HDR(fc, match_param, IPV4_SRC_O,
+			    outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv4_src_dest_outer.source_address);
+	}
+	if (is_d_ipv6) {
+		/* Handle IPv6 destination address */
+		HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_O,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96,
+			    ipv6_dst_outer.ipv6_address_127_96);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_95_64_O,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64,
+			    ipv6_dst_outer.ipv6_address_95_64);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_63_32_O,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32,
+			    ipv6_dst_outer.ipv6_address_63_32);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_31_0_O,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv6_dst_outer.ipv6_address_31_0);
+	} else {
+		/* Handle IPv4 destination address */
+		HWS_SET_HDR(fc, match_param, IPV4_DST_O,
+			    outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv4_src_dest_outer.destination_address);
+	}
+
+	/* L4 Handle TCP/UDP */
+	HWS_SET_HDR(fc, match_param, L4_SPORT_O,
+		    outer_headers.tcp_sport, eth_l4_outer.source_port);
+	HWS_SET_HDR(fc, match_param, L4_DPORT_O,
+		    outer_headers.tcp_dport, eth_l4_outer.destination_port);
+	HWS_SET_HDR(fc, match_param, L4_SPORT_O,
+		    outer_headers.udp_sport, eth_l4_outer.source_port);
+	HWS_SET_HDR(fc, match_param, L4_DPORT_O,
+		    outer_headers.udp_dport, eth_l4_outer.destination_port);
+	HWS_SET_HDR(fc, match_param, TCP_FLAGS_O,
+		    outer_headers.tcp_flags, eth_l4_outer.tcp_flags);
+
+	/* L3 Handle DSCP, ECN and IHL  */
+	HWS_SET_HDR(fc, match_param, IP_DSCP_O,
+		    outer_headers.ip_dscp, eth_l3_outer.dscp);
+	HWS_SET_HDR(fc, match_param, IP_ECN_O,
+		    outer_headers.ip_ecn, eth_l3_outer.ecn);
+	HWS_SET_HDR(fc, match_param, IPV4_IHL_O,
+		    outer_headers.ipv4_ihl, eth_l3_outer.ihl);
+
+	/* Set IP fragmented bit */
+	if (HWS_IS_FLD_SET(match_param, outer_headers.frag)) {
+		smac_set = HWS_IS_FLD_SET(match_param, outer_headers.smac_15_0) ||
+				HWS_IS_FLD_SET(match_param, outer_headers.smac_47_16);
+		dmac_set = HWS_IS_FLD_SET(match_param, outer_headers.dmac_15_0) ||
+				HWS_IS_FLD_SET(match_param, outer_headers.dmac_47_16);
+		if (smac_set == dmac_set) {
+			HWS_SET_HDR(fc, match_param, IP_FRAG_O,
+				    outer_headers.frag, eth_l4_outer.ip_fragmented);
+		} else {
+			HWS_SET_HDR(fc, match_param, IP_FRAG_O,
+				    outer_headers.frag, eth_l2_src_outer.ip_fragmented);
+		}
+	}
+
+	/* L3_type set */
+	if (HWS_IS_FLD_SET(match_param, outer_headers.ip_version)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_outer.l3_type);
+		curr_fc->tag_set = &hws_definer_l3_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+		HWS_CALC_HDR_SRC(curr_fc, outer_headers.ip_version);
+	}
+
+	return 0;
+}
+
+static int
+hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
+		       u32 *match_param)
+{
+	bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set;
+	struct mlx5hws_definer_fc *fc = cd->fc;
+	struct mlx5hws_definer_fc *curr_fc;
+	u32 *s_ipv6, *d_ipv6;
+
+	if (HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type, 0x2) ||
+	    HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c2, 0xe) ||
+	    HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c4, 0x4)) {
+		mlx5hws_err(cd->ctx, "Unsupported inner parameters set\n");
+		return -EINVAL;
+	}
+
+	/* L2 Check ethertype */
+	HWS_SET_HDR(fc, match_param, ETH_TYPE_I,
+		    inner_headers.ethertype,
+		    eth_l2_inner.l3_ethertype);
+	/* L2 Check SMAC 47_16 */
+	HWS_SET_HDR(fc, match_param, ETH_SMAC_47_16_I,
+		    inner_headers.smac_47_16, eth_l2_src_inner.smac_47_16);
+	/* L2 Check SMAC 15_0 */
+	HWS_SET_HDR(fc, match_param, ETH_SMAC_15_0_I,
+		    inner_headers.smac_15_0, eth_l2_src_inner.smac_15_0);
+	/* L2 Check DMAC 47_16 */
+	HWS_SET_HDR(fc, match_param, ETH_DMAC_47_16_I,
+		    inner_headers.dmac_47_16, eth_l2_inner.dmac_47_16);
+	/* L2 Check DMAC 15_0 */
+	HWS_SET_HDR(fc, match_param, ETH_DMAC_15_0_I,
+		    inner_headers.dmac_15_0, eth_l2_inner.dmac_15_0);
+
+	/* L2 VLAN */
+	HWS_SET_HDR(fc, match_param, VLAN_FIRST_PRIO_I,
+		    inner_headers.first_prio, eth_l2_inner.first_priority);
+	HWS_SET_HDR(fc, match_param, VLAN_CFI_I,
+		    inner_headers.first_cfi, eth_l2_inner.first_cfi);
+	HWS_SET_HDR(fc, match_param, VLAN_ID_I,
+		    inner_headers.first_vid, eth_l2_inner.first_vlan_id);
+
+	/* L2 CVLAN and SVLAN */
+	if (HWS_GET_MATCH_PARAM(match_param, inner_headers.cvlan_tag) ||
+	    HWS_GET_MATCH_PARAM(match_param, inner_headers.svlan_tag)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_inner.first_vlan_qualifier);
+		curr_fc->tag_set = &hws_definer_inner_vlan_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+	}
+	/* L3 Check IP header */
+	HWS_SET_HDR(fc, match_param, IP_PROTOCOL_I,
+		    inner_headers.ip_protocol,
+		    eth_l3_inner.protocol_next_header);
+	HWS_SET_HDR(fc, match_param, IP_VERSION_I,
+		    inner_headers.ip_version,
+		    eth_l3_inner.ip_version);
+	HWS_SET_HDR(fc, match_param, IP_TTL_I,
+		    inner_headers.ttl_hoplimit,
+		    eth_l3_inner.time_to_live_hop_limit);
+
+	/* L3 Check IPv4/IPv6 addresses */
+	s_ipv6 = MLX5_ADDR_OF(fte_match_param, match_param,
+			      inner_headers.src_ipv4_src_ipv6.ipv6_layout);
+	d_ipv6 = MLX5_ADDR_OF(fte_match_param, match_param,
+			      inner_headers.dst_ipv4_dst_ipv6.ipv6_layout);
+
+	/* Assume IPv6 is used if ipv6 bits are set */
+	is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2];
+	is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
+
+	if (is_s_ipv6) {
+		/* Handle IPv6 source address */
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I,
+			    inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96,
+			    ipv6_src_inner.ipv6_address_127_96);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_95_64_I,
+			    inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64,
+			    ipv6_src_inner.ipv6_address_95_64);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_63_32_I,
+			    inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32,
+			    ipv6_src_inner.ipv6_address_63_32);
+		HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_I,
+			    inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv6_src_inner.ipv6_address_31_0);
+	} else {
+		/* Handle IPv4 source address */
+		HWS_SET_HDR(fc, match_param, IPV4_SRC_I,
+			    inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv4_src_dest_inner.source_address);
+	}
+	if (is_d_ipv6) {
+		/* Handle IPv6 destination address */
+		HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_I,
+			    inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96,
+			    ipv6_dst_inner.ipv6_address_127_96);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_95_64_I,
+			    inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64,
+			    ipv6_dst_inner.ipv6_address_95_64);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_63_32_I,
+			    inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32,
+			    ipv6_dst_inner.ipv6_address_63_32);
+		HWS_SET_HDR(fc, match_param, IPV6_DST_31_0_I,
+			    inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv6_dst_inner.ipv6_address_31_0);
+	} else {
+		/* Handle IPv4 destination address */
+		HWS_SET_HDR(fc, match_param, IPV4_DST_I,
+			    inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
+			    ipv4_src_dest_inner.destination_address);
+	}
+
+	/* L4 Handle TCP/UDP */
+	HWS_SET_HDR(fc, match_param, L4_SPORT_I,
+		    inner_headers.tcp_sport, eth_l4_inner.source_port);
+	HWS_SET_HDR(fc, match_param, L4_DPORT_I,
+		    inner_headers.tcp_dport, eth_l4_inner.destination_port);
+	HWS_SET_HDR(fc, match_param, L4_SPORT_I,
+		    inner_headers.udp_sport, eth_l4_inner.source_port);
+	HWS_SET_HDR(fc, match_param, L4_DPORT_I,
+		    inner_headers.udp_dport, eth_l4_inner.destination_port);
+	HWS_SET_HDR(fc, match_param, TCP_FLAGS_I,
+		    inner_headers.tcp_flags, eth_l4_inner.tcp_flags);
+
+	/* L3 Handle DSCP, ECN and IHL  */
+	HWS_SET_HDR(fc, match_param, IP_DSCP_I,
+		    inner_headers.ip_dscp, eth_l3_inner.dscp);
+	HWS_SET_HDR(fc, match_param, IP_ECN_I,
+		    inner_headers.ip_ecn, eth_l3_inner.ecn);
+	HWS_SET_HDR(fc, match_param, IPV4_IHL_I,
+		    inner_headers.ipv4_ihl, eth_l3_inner.ihl);
+
+	/* Set IP fragmented bit */
+	if (HWS_IS_FLD_SET(match_param, inner_headers.frag)) {
+		if (HWS_IS_FLD_SET(match_param, misc_parameters.vxlan_vni)) {
+			HWS_SET_HDR(fc, match_param, IP_FRAG_I,
+				    inner_headers.frag, eth_l2_inner.ip_fragmented);
+		} else {
+			smac_set = HWS_IS_FLD_SET(match_param, inner_headers.smac_15_0) ||
+				   HWS_IS_FLD_SET(match_param, inner_headers.smac_47_16);
+			dmac_set = HWS_IS_FLD_SET(match_param, inner_headers.dmac_15_0) ||
+				   HWS_IS_FLD_SET(match_param, inner_headers.dmac_47_16);
+			if (smac_set == dmac_set) {
+				HWS_SET_HDR(fc, match_param, IP_FRAG_I,
+					    inner_headers.frag, eth_l4_inner.ip_fragmented);
+			} else {
+				HWS_SET_HDR(fc, match_param, IP_FRAG_I,
+					    inner_headers.frag, eth_l2_src_inner.ip_fragmented);
+			}
+		}
+	}
+
+	/* L3_type set */
+	if (HWS_IS_FLD_SET(match_param, inner_headers.ip_version)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_inner.l3_type);
+		curr_fc->tag_set = &hws_definer_l3_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+		HWS_CALC_HDR_SRC(curr_fc, inner_headers.ip_version);
+	}
+
+	return 0;
+}
+
+static int
+hws_definer_conv_misc(struct mlx5hws_definer_conv_data *cd,
+		      u32 *match_param)
+{
+	struct mlx5hws_cmd_query_caps *caps = cd->ctx->caps;
+	struct mlx5hws_definer_fc *fc = cd->fc;
+	struct mlx5hws_definer_fc *curr_fc;
+
+	if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_1, 0x1) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_64, 0xc) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_d8, 0x6) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_e0, 0xc) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_100, 0xc) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_120, 0xa) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_140, 0x8) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters.bth_dst_qp) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters.bth_opcode) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters.inner_esp_spi) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters.outer_esp_spi) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters.source_vhca_port) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters.reserved_at_1a0, 0x60)) {
+		mlx5hws_err(cd->ctx, "Unsupported misc parameters set\n");
+		return -EINVAL;
+	}
+
+	/* Check GRE related fields */
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.gre_c_present)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GRE_C];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.gre_c_present,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_gre, gre_c_present);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_gre, gre_c_present);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.gre_k_present)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GRE_K];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.gre_k_present,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_gre, gre_k_present);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_gre, gre_k_present);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.gre_s_present)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GRE_S];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.gre_s_present,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_gre, gre_s_present);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_gre, gre_s_present);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.gre_protocol)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.gre_protocol,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_gre, gre_protocol);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_gre, gre_protocol);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.gre_key.key)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE |
+				   MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE_OPT_KEY;
+		HWS_SET_HDR(fc, match_param, GRE_OPT_KEY,
+			    misc_parameters.gre_key.key, tunnel_header.tunnel_header_2);
+	}
+
+	/* Check GENEVE related fields */
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.geneve_vni)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GENEVE_VNI];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.geneve_vni,
+			     tunnel_header.tunnel_header_1);
+		curr_fc->bit_mask = __mlx5_mask(header_geneve, vni);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_geneve, vni);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.geneve_opt_len)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.geneve_opt_len,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_geneve, opt_len);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_geneve, opt_len);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.geneve_protocol_type)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GENEVE_PROTO];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.geneve_protocol_type,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_geneve, protocol_type);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_geneve, protocol_type);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters.geneve_oam)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GENEVE_OAM];
+		HWS_CALC_HDR(curr_fc,
+			     misc_parameters.geneve_oam,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_geneve, o_flag);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_geneve, o_flag);
+	}
+
+	HWS_SET_HDR(fc, match_param, SOURCE_QP,
+		    misc_parameters.source_sqn, source_qp_gvmi.source_qp);
+	HWS_SET_HDR(fc, match_param, IPV6_FLOW_LABEL_O,
+		    misc_parameters.outer_ipv6_flow_label, eth_l3_outer.flow_label);
+	HWS_SET_HDR(fc, match_param, IPV6_FLOW_LABEL_I,
+		    misc_parameters.inner_ipv6_flow_label, eth_l3_inner.flow_label);
+
+	/* L2 Second VLAN */
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_PRIO_O,
+		    misc_parameters.outer_second_prio, eth_l2_outer.second_priority);
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_PRIO_I,
+		    misc_parameters.inner_second_prio, eth_l2_inner.second_priority);
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_CFI_O,
+		    misc_parameters.outer_second_cfi, eth_l2_outer.second_cfi);
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_CFI_I,
+		    misc_parameters.inner_second_cfi, eth_l2_inner.second_cfi);
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_ID_O,
+		    misc_parameters.outer_second_vid, eth_l2_outer.second_vlan_id);
+	HWS_SET_HDR(fc, match_param, VLAN_SECOND_ID_I,
+		    misc_parameters.inner_second_vid, eth_l2_inner.second_vlan_id);
+
+	/* L2 Second CVLAN and SVLAN */
+	if (HWS_GET_MATCH_PARAM(match_param, misc_parameters.outer_second_cvlan_tag) ||
+	    HWS_GET_MATCH_PARAM(match_param, misc_parameters.outer_second_svlan_tag)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_outer.second_vlan_qualifier);
+		curr_fc->tag_set = &hws_definer_outer_second_vlan_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+	}
+
+	if (HWS_GET_MATCH_PARAM(match_param, misc_parameters.inner_second_cvlan_tag) ||
+	    HWS_GET_MATCH_PARAM(match_param, misc_parameters.inner_second_svlan_tag)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I];
+		HWS_CALC_HDR_DST(curr_fc, eth_l2_inner.second_vlan_qualifier);
+		curr_fc->tag_set = &hws_definer_inner_second_vlan_type_set;
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+	}
+
+	/* VXLAN VNI  */
+	if (HWS_GET_MATCH_PARAM(match_param, misc_parameters.vxlan_vni)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN;
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VXLAN_VNI];
+		HWS_CALC_HDR(curr_fc, misc_parameters.vxlan_vni, tunnel_header.tunnel_header_1);
+		curr_fc->bit_mask = __mlx5_mask(header_vxlan, vni);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_vxlan, vni);
+	}
+
+	/* Flex protocol steering ok bits */
+	if (HWS_GET_MATCH_PARAM(match_param, misc_parameters.geneve_tlv_option_0_exist)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+
+		if (!caps->flex_parser_ok_bits_supp) {
+			mlx5hws_err(cd->ctx, "Unsupported flex_parser_ok_bits_supp capability\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = hws_definer_flex_parser_steering_ok_bits_handler(
+				cd, caps->flex_parser_id_geneve_tlv_option_0);
+		if (!curr_fc)
+			return -EINVAL;
+
+		HWS_CALC_HDR_SRC(fc, misc_parameters.geneve_tlv_option_0_exist);
+	}
+
+	if (HWS_GET_MATCH_PARAM(match_param, misc_parameters.source_port)) {
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_SOURCE_GVMI];
+		HWS_CALC_HDR_DST(curr_fc, source_qp_gvmi.source_gvmi);
+		curr_fc->tag_mask_set = &hws_definer_ones_set;
+		curr_fc->tag_set = HWS_IS_FLD_SET(match_param,
+						  misc_parameters.source_eswitch_owner_vhca_id) ?
+						  &hws_definer_set_source_gvmi_vhca_id :
+						  &hws_definer_set_source_gvmi;
+	} else {
+		if (HWS_IS_FLD_SET(match_param, misc_parameters.source_eswitch_owner_vhca_id)) {
+			mlx5hws_err(cd->ctx,
+				    "Unsupported source_eswitch_owner_vhca_id field usage\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int
+hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd,
+		       u32 *match_param)
+{
+	struct mlx5hws_cmd_query_caps *caps = cd->ctx->caps;
+	struct mlx5hws_definer_fc *fc = cd->fc;
+	struct mlx5hws_definer_fc *curr_fc;
+
+	if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1b8, 0x8) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_2.macsec_syndrome) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_2.ipsec_syndrome)) {
+		mlx5hws_err(cd->ctx, "Unsupported misc2 parameters set\n");
+		return -EINVAL;
+	}
+
+	HWS_SET_HDR(fc, match_param, MPLS0_O,
+		    misc_parameters_2.outer_first_mpls, mpls_outer.mpls0_label);
+	HWS_SET_HDR(fc, match_param, MPLS0_I,
+		    misc_parameters_2.inner_first_mpls, mpls_inner.mpls0_label);
+	HWS_SET_HDR(fc, match_param, REG_0,
+		    misc_parameters_2.metadata_reg_c_0, registers.register_c_0);
+	HWS_SET_HDR(fc, match_param, REG_1,
+		    misc_parameters_2.metadata_reg_c_1, registers.register_c_1);
+	HWS_SET_HDR(fc, match_param, REG_2,
+		    misc_parameters_2.metadata_reg_c_2, registers.register_c_2);
+	HWS_SET_HDR(fc, match_param, REG_3,
+		    misc_parameters_2.metadata_reg_c_3, registers.register_c_3);
+	HWS_SET_HDR(fc, match_param, REG_4,
+		    misc_parameters_2.metadata_reg_c_4, registers.register_c_4);
+	HWS_SET_HDR(fc, match_param, REG_5,
+		    misc_parameters_2.metadata_reg_c_5, registers.register_c_5);
+	HWS_SET_HDR(fc, match_param, REG_6,
+		    misc_parameters_2.metadata_reg_c_6, registers.register_c_6);
+	HWS_SET_HDR(fc, match_param, REG_7,
+		    misc_parameters_2.metadata_reg_c_7, registers.register_c_7);
+	HWS_SET_HDR(fc, match_param, REG_A,
+		    misc_parameters_2.metadata_reg_a, metadata.general_purpose);
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_2.outer_first_mpls_over_gre)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_GRE;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported misc2 first mpls over gre parameters set\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = hws_definer_flex_parser_handler(cd, caps->flex_parser_id_mpls_over_gre);
+		if (!curr_fc)
+			return -EINVAL;
+
+		HWS_CALC_HDR_SRC(fc, misc_parameters_2.outer_first_mpls_over_gre);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_2.outer_first_mpls_over_udp)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_UDP;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported misc2 first mpls over udp parameters set\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = hws_definer_flex_parser_handler(cd, caps->flex_parser_id_mpls_over_udp);
+		if (!curr_fc)
+			return -EINVAL;
+
+		HWS_CALC_HDR_SRC(fc, misc_parameters_2.outer_first_mpls_over_udp);
+	}
+
+	return 0;
+}
+
+static int
+hws_definer_conv_misc3(struct mlx5hws_definer_conv_data *cd, u32 *match_param)
+{
+	struct mlx5hws_cmd_query_caps *caps = cd->ctx->caps;
+	struct mlx5hws_definer_fc *fc = cd->fc;
+	struct mlx5hws_definer_fc *curr_fc;
+	bool vxlan_gpe_flex_parser_enabled;
+
+	/* Check reserved and unsupported fields */
+	if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_3.reserved_at_80, 0x8) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_3.reserved_at_b0, 0x10) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_3.reserved_at_170, 0x10) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_3.reserved_at_1e0, 0x20)) {
+		mlx5hws_err(cd->ctx, "Unsupported misc3 parameters set\n");
+		return -EINVAL;
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.inner_tcp_seq_num) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.inner_tcp_ack_num)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TCP_I;
+		HWS_SET_HDR(fc, match_param, TCP_SEQ_NUM,
+			    misc_parameters_3.inner_tcp_seq_num, tcp_icmp.tcp_seq);
+		HWS_SET_HDR(fc, match_param, TCP_ACK_NUM,
+			    misc_parameters_3.inner_tcp_ack_num, tcp_icmp.tcp_ack);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.outer_tcp_seq_num) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.outer_tcp_ack_num)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TCP_O;
+		HWS_SET_HDR(fc, match_param, TCP_SEQ_NUM,
+			    misc_parameters_3.outer_tcp_seq_num, tcp_icmp.tcp_seq);
+		HWS_SET_HDR(fc, match_param, TCP_ACK_NUM,
+			    misc_parameters_3.outer_tcp_ack_num, tcp_icmp.tcp_ack);
+	}
+
+	vxlan_gpe_flex_parser_enabled = caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED;
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.outer_vxlan_gpe_vni)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN_GPE;
+
+		if (!vxlan_gpe_flex_parser_enabled) {
+			mlx5hws_err(cd->ctx, "Unsupported VXLAN GPE flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI];
+		HWS_CALC_HDR(curr_fc, misc_parameters_3.outer_vxlan_gpe_vni,
+			     tunnel_header.tunnel_header_1);
+		curr_fc->bit_mask = __mlx5_mask(header_vxlan_gpe, vni);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_vxlan_gpe, vni);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.outer_vxlan_gpe_next_protocol)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN_GPE;
+
+		if (!vxlan_gpe_flex_parser_enabled) {
+			mlx5hws_err(cd->ctx, "Unsupported VXLAN GPE flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO];
+		HWS_CALC_HDR(curr_fc, misc_parameters_3.outer_vxlan_gpe_next_protocol,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->byte_off += MLX5_BYTE_OFF(header_vxlan_gpe, protocol);
+		curr_fc->bit_mask = __mlx5_mask(header_vxlan_gpe, protocol);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_vxlan_gpe, protocol);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.outer_vxlan_gpe_flags)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN_GPE;
+
+		if (!vxlan_gpe_flex_parser_enabled) {
+			mlx5hws_err(cd->ctx, "Unsupported VXLAN GPE flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS];
+		HWS_CALC_HDR(curr_fc, misc_parameters_3.outer_vxlan_gpe_flags,
+			     tunnel_header.tunnel_header_0);
+		curr_fc->bit_mask = __mlx5_mask(header_vxlan_gpe, flags);
+		curr_fc->bit_off = __mlx5_dw_bit_off(header_vxlan_gpe, flags);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.icmp_header_data) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmp_type) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmp_code)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_ICMPV4;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported ICMPv4 flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		HWS_SET_HDR(fc, match_param, ICMP_DW3,
+			    misc_parameters_3.icmp_header_data, tcp_icmp.icmp_dw3);
+
+		if (HWS_IS_FLD_SET(match_param, misc_parameters_3.icmp_type) ||
+		    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmp_code)) {
+			curr_fc = &fc[MLX5HWS_DEFINER_FNAME_ICMP_DW1];
+			HWS_CALC_HDR_DST(curr_fc, tcp_icmp.icmp_dw1);
+			curr_fc->tag_set = &hws_definer_icmp_dw1_set;
+		}
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.icmpv6_header_data) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmpv6_type) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmpv6_code)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_ICMPV6;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported ICMPv6 parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		HWS_SET_HDR(fc, match_param, ICMP_DW3,
+			    misc_parameters_3.icmpv6_header_data, tcp_icmp.icmp_dw3);
+
+		if (HWS_IS_FLD_SET(match_param, misc_parameters_3.icmpv6_type) ||
+		    HWS_IS_FLD_SET(match_param, misc_parameters_3.icmpv6_code)) {
+			curr_fc = &fc[MLX5HWS_DEFINER_FNAME_ICMP_DW1];
+			HWS_CALC_HDR_DST(curr_fc, tcp_icmp.icmp_dw1);
+			curr_fc->tag_set = &hws_definer_icmpv6_dw1_set;
+		}
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.geneve_tlv_option_0_data)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE;
+
+		curr_fc =
+			hws_definer_flex_parser_handler(cd,
+							caps->flex_parser_id_geneve_tlv_option_0);
+		if (!curr_fc)
+			return -EINVAL;
+
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.geneve_tlv_option_0_data);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_teid)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU TEID flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_GTP_TEID];
+		fc->tag_set = &hws_definer_generic_set;
+		fc->bit_mask = __mlx5_mask(header_gtp, teid);
+		fc->byte_off = caps->format_select_gtpu_dw_1 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_teid);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_msg_type)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE];
+		fc->tag_set = &hws_definer_generic_set;
+		fc->bit_mask = __mlx5_mask(header_gtp, msg_type);
+		fc->bit_off = __mlx5_dw_bit_off(header_gtp, msg_type);
+		fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_msg_type);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_msg_flags)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		fc = &cd->fc[MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE];
+		fc->tag_set = &hws_definer_generic_set;
+		fc->bit_mask = __mlx5_mask(header_gtp, msg_flags);
+		fc->bit_off = __mlx5_dw_bit_off(header_gtp, msg_flags);
+		fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_msg_flags);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_dw_2)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU DW2 flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GTPU_DW2];
+		curr_fc->tag_set = &hws_definer_generic_set;
+		curr_fc->bit_mask = -1;
+		curr_fc->byte_off = caps->format_select_gtpu_dw_2 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_dw_2);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_first_ext_dw_0)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU first EXT DW0 flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0];
+		curr_fc->tag_set = &hws_definer_generic_set;
+		curr_fc->bit_mask = -1;
+		curr_fc->byte_off = caps->format_select_gtpu_ext_dw_0 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_first_ext_dw_0);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_3.gtpu_dw_0)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU;
+
+		if (!(caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED)) {
+			mlx5hws_err(cd->ctx, "Unsupported GTPU DW0 flex parser\n");
+			return -EOPNOTSUPP;
+		}
+
+		curr_fc = &fc[MLX5HWS_DEFINER_FNAME_GTPU_DW0];
+		curr_fc->tag_set = &hws_definer_generic_set;
+		curr_fc->bit_mask = -1;
+		curr_fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
+		HWS_CALC_HDR_SRC(fc, misc_parameters_3.gtpu_dw_0);
+	}
+
+	return 0;
+}
+
+static int
+hws_definer_conv_misc4(struct mlx5hws_definer_conv_data *cd,
+		       u32 *match_param)
+{
+	bool parser_is_used[HWS_NUM_OF_FLEX_PARSERS] = {};
+	struct mlx5hws_definer_fc *fc;
+	u32 id, value;
+
+	if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_4.reserved_at_100, 0x100)) {
+		mlx5hws_err(cd->ctx, "Unsupported misc4 parameters set\n");
+		return -EINVAL;
+	}
+
+	id = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_id_0);
+	value = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_value_0);
+	fc = hws_definer_misc4_fields_handler(cd, parser_is_used, id, value);
+	if (!fc)
+		return -EINVAL;
+
+	HWS_CALC_HDR_SRC(fc, misc_parameters_4.prog_sample_field_value_0);
+
+	id = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_id_1);
+	value = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_value_1);
+	fc = hws_definer_misc4_fields_handler(cd, parser_is_used, id, value);
+	if (!fc)
+		return -EINVAL;
+
+	HWS_CALC_HDR_SRC(fc, misc_parameters_4.prog_sample_field_value_1);
+
+	id = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_id_2);
+	value = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_value_2);
+	fc = hws_definer_misc4_fields_handler(cd, parser_is_used, id, value);
+	if (!fc)
+		return -EINVAL;
+
+	HWS_CALC_HDR_SRC(fc, misc_parameters_4.prog_sample_field_value_2);
+
+	id = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_id_3);
+	value = HWS_GET_MATCH_PARAM(match_param, misc_parameters_4.prog_sample_field_value_3);
+	fc = hws_definer_misc4_fields_handler(cd, parser_is_used, id, value);
+	if (!fc)
+		return -EINVAL;
+
+	HWS_CALC_HDR_SRC(fc, misc_parameters_4.prog_sample_field_value_3);
+
+	return 0;
+}
+
+static int
+hws_definer_conv_misc5(struct mlx5hws_definer_conv_data *cd,
+		       u32 *match_param)
+{
+	struct mlx5hws_definer_fc *fc = cd->fc;
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_5.macsec_tag_0) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_5.macsec_tag_1) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_5.macsec_tag_2) ||
+	    HWS_IS_FLD_SET(match_param, misc_parameters_5.macsec_tag_3) ||
+	    HWS_IS_FLD_SET_SZ(match_param, misc_parameters_5.reserved_at_100, 0x100)) {
+		mlx5hws_err(cd->ctx, "Unsupported misc5 parameters set\n");
+		return -EINVAL;
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_5.tunnel_header_0)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_0_1;
+		HWS_SET_HDR(fc, match_param, TNL_HDR_0,
+			    misc_parameters_5.tunnel_header_0, tunnel_header.tunnel_header_0);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_5.tunnel_header_1)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_0_1;
+		HWS_SET_HDR(fc, match_param, TNL_HDR_1,
+			    misc_parameters_5.tunnel_header_1, tunnel_header.tunnel_header_1);
+	}
+
+	if (HWS_IS_FLD_SET(match_param, misc_parameters_5.tunnel_header_2)) {
+		cd->match_flags |= MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_2;
+		HWS_SET_HDR(fc, match_param, TNL_HDR_2,
+			    misc_parameters_5.tunnel_header_2, tunnel_header.tunnel_header_2);
+	}
+
+	HWS_SET_HDR(fc, match_param, TNL_HDR_3,
+		    misc_parameters_5.tunnel_header_3, tunnel_header.tunnel_header_3);
+
+	return 0;
+}
+
+static int hws_definer_get_fc_size(struct mlx5hws_definer_fc *fc)
+{
+	u32 fc_sz = 0;
+	int i;
+
+	/* For empty matcher, ZERO_SIZE_PTR is returned */
+	if (fc == ZERO_SIZE_PTR)
+		return 0;
+
+	for (i = 0; i < MLX5HWS_DEFINER_FNAME_MAX; i++)
+		if (fc[i].tag_set)
+			fc_sz++;
+	return fc_sz;
+}
+
+static struct mlx5hws_definer_fc *
+hws_definer_alloc_compressed_fc(struct mlx5hws_definer_fc *fc)
+{
+	struct mlx5hws_definer_fc *compressed_fc = NULL;
+	u32 definer_size = hws_definer_get_fc_size(fc);
+	u32 fc_sz = 0;
+	int i;
+
+	compressed_fc = kcalloc(definer_size, sizeof(*compressed_fc), GFP_KERNEL);
+	if (!compressed_fc)
+		return NULL;
+
+	/* For empty matcher, ZERO_SIZE_PTR is returned */
+	if (!definer_size)
+		return compressed_fc;
+
+	for (i = 0, fc_sz = 0; i < MLX5HWS_DEFINER_FNAME_MAX; i++) {
+		if (!fc[i].tag_set)
+			continue;
+
+		fc[i].fname = i;
+		memcpy(&compressed_fc[fc_sz++], &fc[i], sizeof(*compressed_fc));
+	}
+
+	return compressed_fc;
+}
+
+static void
+hws_definer_set_hl(u8 *hl, struct mlx5hws_definer_fc *fc)
+{
+	int i;
+
+	/* nothing to do for empty matcher */
+	if (fc == ZERO_SIZE_PTR)
+		return;
+
+	for (i = 0; i < MLX5HWS_DEFINER_FNAME_MAX; i++) {
+		if (!fc[i].tag_set)
+			continue;
+
+		HWS_SET32(hl, -1, fc[i].byte_off, fc[i].bit_off, fc[i].bit_mask);
+	}
+}
+
+static struct mlx5hws_definer_fc *
+hws_definer_alloc_fc(struct mlx5hws_context *ctx,
+		     size_t len)
+{
+	struct mlx5hws_definer_fc *fc;
+	int i;
+
+	fc = kcalloc(len, sizeof(*fc), GFP_KERNEL);
+	if (!fc)
+		return NULL;
+
+	for (i = 0; i < len; i++)
+		fc[i].ctx = ctx;
+
+	return fc;
+}
+
+static int
+hws_definer_conv_match_params_to_hl(struct mlx5hws_context *ctx,
+				    struct mlx5hws_match_template *mt,
+				    u8 *hl)
+{
+	struct mlx5hws_definer_conv_data cd = {0};
+	struct mlx5hws_definer_fc *fc;
+	int ret;
+
+	fc = hws_definer_alloc_fc(ctx, MLX5HWS_DEFINER_FNAME_MAX);
+	if (!fc)
+		return -ENOMEM;
+
+	cd.fc = fc;
+	cd.ctx = ctx;
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC6) {
+		mlx5hws_err(ctx, "Unsupported match_criteria_enable provided\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_OUTER) {
+		ret = hws_definer_conv_outer(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_INNER) {
+		ret = hws_definer_conv_inner(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC) {
+		ret = hws_definer_conv_misc(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2) {
+		ret = hws_definer_conv_misc2(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC3) {
+		ret = hws_definer_conv_misc3(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC4) {
+		ret = hws_definer_conv_misc4(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (mt->match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC5) {
+		ret = hws_definer_conv_misc5(&cd, mt->match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	/* Check there is no conflicted fields set together */
+	ret = hws_definer_check_match_flags(&cd);
+	if (ret)
+		goto err_free_fc;
+
+	/* Allocate fc array on mt */
+	mt->fc = hws_definer_alloc_compressed_fc(fc);
+	if (!mt->fc) {
+		mlx5hws_err(ctx,
+			    "Convert match params: failed to set field copy to match template\n");
+		ret = -ENOMEM;
+		goto err_free_fc;
+	}
+	mt->fc_sz = hws_definer_get_fc_size(fc);
+
+	/* Fill in headers layout */
+	hws_definer_set_hl(hl, fc);
+
+	kfree(fc);
+	return 0;
+
+err_free_fc:
+	kfree(fc);
+	return ret;
+}
+
+struct mlx5hws_definer_fc *
+mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx,
+						   u8 match_criteria_enable,
+						   u32 *match_param,
+						   int *fc_sz)
+{
+	struct mlx5hws_definer_fc *compressed_fc = NULL;
+	struct mlx5hws_definer_conv_data cd = {0};
+	struct mlx5hws_definer_fc *fc;
+	int ret;
+
+	fc = hws_definer_alloc_fc(ctx, MLX5HWS_DEFINER_FNAME_MAX);
+	if (!fc)
+		return NULL;
+
+	cd.fc = fc;
+	cd.ctx = ctx;
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_OUTER) {
+		ret = hws_definer_conv_outer(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_INNER) {
+		ret = hws_definer_conv_inner(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC) {
+		ret = hws_definer_conv_misc(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2) {
+		ret = hws_definer_conv_misc2(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC3) {
+		ret = hws_definer_conv_misc3(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC4) {
+		ret = hws_definer_conv_misc4(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC5) {
+		ret = hws_definer_conv_misc5(&cd, match_param);
+		if (ret)
+			goto err_free_fc;
+	}
+
+	/* Allocate fc array on mt */
+	compressed_fc = hws_definer_alloc_compressed_fc(fc);
+	if (!compressed_fc) {
+		mlx5hws_err(ctx,
+			    "Convert to compressed fc: failed to set field copy to match template\n");
+		goto err_free_fc;
+	}
+	*fc_sz = hws_definer_get_fc_size(fc);
+
+err_free_fc:
+	kfree(fc);
+	return compressed_fc;
+}
+
+static int
+hws_definer_find_byte_in_tag(struct mlx5hws_definer *definer,
+			     u32 hl_byte_off,
+			     u32 *tag_byte_off)
+{
+	int i, dw_to_scan;
+	u8 byte_offset;
+
+	/* Avoid accessing unused DW selectors */
+	dw_to_scan = mlx5hws_definer_is_jumbo(definer) ?
+		DW_SELECTORS : DW_SELECTORS_MATCH;
+
+	/* Add offset since each DW covers multiple BYTEs */
+	byte_offset = hl_byte_off % DW_SIZE;
+	for (i = 0; i < dw_to_scan; i++) {
+		if (definer->dw_selector[i] == hl_byte_off / DW_SIZE) {
+			*tag_byte_off = byte_offset + DW_SIZE * (DW_SELECTORS - i - 1);
+			return 0;
+		}
+	}
+
+	/* Add offset to skip DWs in definer */
+	byte_offset = DW_SIZE * DW_SELECTORS;
+	/* Iterate in reverse since the code uses bytes from 7 -> 0 */
+	for (i = BYTE_SELECTORS; i-- > 0 ;) {
+		if (definer->byte_selector[i] == hl_byte_off) {
+			*tag_byte_off = byte_offset + (BYTE_SELECTORS - i - 1);
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int
+hws_definer_fc_bind(struct mlx5hws_definer *definer,
+		    struct mlx5hws_definer_fc *fc,
+		    u32 fc_sz)
+{
+	u32 tag_offset = 0;
+	int ret, byte_diff;
+	u32 i;
+
+	for (i = 0; i < fc_sz; i++) {
+		/* Map header layout byte offset to byte offset in tag */
+		ret = hws_definer_find_byte_in_tag(definer, fc->byte_off, &tag_offset);
+		if (ret)
+			return ret;
+
+		/* Move setter based on the location in the definer */
+		byte_diff = fc->byte_off % DW_SIZE - tag_offset % DW_SIZE;
+		fc->bit_off = fc->bit_off + byte_diff * BITS_IN_BYTE;
+
+		/* Update offset in headers layout to offset in tag */
+		fc->byte_off = tag_offset;
+		fc++;
+	}
+
+	return 0;
+}
+
+static bool
+hws_definer_best_hl_fit_recu(struct mlx5hws_definer_sel_ctrl *ctrl,
+			     u32 cur_dw,
+			     u32 *data)
+{
+	u8 bytes_set;
+	int byte_idx;
+	bool ret;
+	int i;
+
+	/* Reached end, nothing left to do */
+	if (cur_dw == MLX5_ST_SZ_DW(definer_hl))
+		return true;
+
+	/* No data set, can skip to next DW */
+	while (!*data) {
+		cur_dw++;
+		data++;
+
+		/* Reached end, nothing left to do */
+		if (cur_dw == MLX5_ST_SZ_DW(definer_hl))
+			return true;
+	}
+
+	/* Used all DW selectors and Byte selectors, no possible solution */
+	if (ctrl->allowed_full_dw == ctrl->used_full_dw &&
+	    ctrl->allowed_lim_dw == ctrl->used_lim_dw &&
+	    ctrl->allowed_bytes == ctrl->used_bytes)
+		return false;
+
+	/* Try to use limited DW selectors */
+	if (ctrl->allowed_lim_dw > ctrl->used_lim_dw && cur_dw < 64) {
+		ctrl->lim_dw_selector[ctrl->used_lim_dw++] = cur_dw;
+
+		ret = hws_definer_best_hl_fit_recu(ctrl, cur_dw + 1, data + 1);
+		if (ret)
+			return ret;
+
+		ctrl->lim_dw_selector[--ctrl->used_lim_dw] = 0;
+	}
+
+	/* Try to use DW selectors */
+	if (ctrl->allowed_full_dw > ctrl->used_full_dw) {
+		ctrl->full_dw_selector[ctrl->used_full_dw++] = cur_dw;
+
+		ret = hws_definer_best_hl_fit_recu(ctrl, cur_dw + 1, data + 1);
+		if (ret)
+			return ret;
+
+		ctrl->full_dw_selector[--ctrl->used_full_dw] = 0;
+	}
+
+	/* No byte selector for offset bigger than 255 */
+	if (cur_dw * DW_SIZE > 255)
+		return false;
+
+	bytes_set = !!(0x000000ff & *data) +
+		    !!(0x0000ff00 & *data) +
+		    !!(0x00ff0000 & *data) +
+		    !!(0xff000000 & *data);
+
+	/* Check if there are enough byte selectors left */
+	if (bytes_set + ctrl->used_bytes > ctrl->allowed_bytes)
+		return false;
+
+	/* Try to use Byte selectors */
+	for (i = 0; i < DW_SIZE; i++)
+		if ((0xff000000 >> (i * BITS_IN_BYTE)) & be32_to_cpu((__force __be32)*data)) {
+			/* Use byte selectors high to low */
+			byte_idx = ctrl->allowed_bytes - ctrl->used_bytes - 1;
+			ctrl->byte_selector[byte_idx] = cur_dw * DW_SIZE + i;
+			ctrl->used_bytes++;
+		}
+
+	ret = hws_definer_best_hl_fit_recu(ctrl, cur_dw + 1, data + 1);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < DW_SIZE; i++)
+		if ((0xff << (i * BITS_IN_BYTE)) & be32_to_cpu((__force __be32)*data)) {
+			ctrl->used_bytes--;
+			byte_idx = ctrl->allowed_bytes - ctrl->used_bytes - 1;
+			ctrl->byte_selector[byte_idx] = 0;
+		}
+
+	return false;
+}
+
+static void
+hws_definer_copy_sel_ctrl(struct mlx5hws_definer_sel_ctrl *ctrl,
+			  struct mlx5hws_definer *definer)
+{
+	memcpy(definer->byte_selector, ctrl->byte_selector, ctrl->allowed_bytes);
+	memcpy(definer->dw_selector, ctrl->full_dw_selector, ctrl->allowed_full_dw);
+	memcpy(definer->dw_selector + ctrl->allowed_full_dw,
+	       ctrl->lim_dw_selector, ctrl->allowed_lim_dw);
+}
+
+static int
+hws_definer_find_best_match_fit(struct mlx5hws_context *ctx,
+				struct mlx5hws_definer *definer,
+				u8 *hl)
+{
+	struct mlx5hws_definer_sel_ctrl ctrl = {0};
+	bool found;
+
+	/* Try to create a match definer */
+	ctrl.allowed_full_dw = DW_SELECTORS_MATCH;
+	ctrl.allowed_lim_dw = 0;
+	ctrl.allowed_bytes = BYTE_SELECTORS;
+
+	found = hws_definer_best_hl_fit_recu(&ctrl, 0, (u32 *)hl);
+	if (found) {
+		hws_definer_copy_sel_ctrl(&ctrl, definer);
+		definer->type = MLX5HWS_DEFINER_TYPE_MATCH;
+		return 0;
+	}
+
+	/* Try to create a full/limited jumbo definer */
+	ctrl.allowed_full_dw = ctx->caps->full_dw_jumbo_support ? DW_SELECTORS :
+								  DW_SELECTORS_MATCH;
+	ctrl.allowed_lim_dw = ctx->caps->full_dw_jumbo_support ? 0 :
+								 DW_SELECTORS_LIMITED;
+	ctrl.allowed_bytes = BYTE_SELECTORS;
+
+	found = hws_definer_best_hl_fit_recu(&ctrl, 0, (u32 *)hl);
+	if (found) {
+		hws_definer_copy_sel_ctrl(&ctrl, definer);
+		definer->type = MLX5HWS_DEFINER_TYPE_JUMBO;
+		return 0;
+	}
+
+	return E2BIG;
+}
+
+static void
+hws_definer_create_tag_mask(u32 *match_param,
+			    struct mlx5hws_definer_fc *fc,
+			    u32 fc_sz,
+			    u8 *tag)
+{
+	u32 i;
+
+	for (i = 0; i < fc_sz; i++) {
+		if (fc->tag_mask_set)
+			fc->tag_mask_set(fc, match_param, tag);
+		else
+			fc->tag_set(fc, match_param, tag);
+		fc++;
+	}
+}
+
+void mlx5hws_definer_create_tag(u32 *match_param,
+				struct mlx5hws_definer_fc *fc,
+				u32 fc_sz,
+				u8 *tag)
+{
+	u32 i;
+
+	for (i = 0; i < fc_sz; i++) {
+		fc->tag_set(fc, match_param, tag);
+		fc++;
+	}
+}
+
+int mlx5hws_definer_get_id(struct mlx5hws_definer *definer)
+{
+	return definer->obj_id;
+}
+
+int mlx5hws_definer_compare(struct mlx5hws_definer *definer_a,
+			    struct mlx5hws_definer *definer_b)
+{
+	int i;
+
+	/* Future: Optimize by comparing selectors with valid mask only */
+	for (i = 0; i < BYTE_SELECTORS; i++)
+		if (definer_a->byte_selector[i] != definer_b->byte_selector[i])
+			return 1;
+
+	for (i = 0; i < DW_SELECTORS; i++)
+		if (definer_a->dw_selector[i] != definer_b->dw_selector[i])
+			return 1;
+
+	for (i = 0; i < MLX5HWS_JUMBO_TAG_SZ; i++)
+		if (definer_a->mask.jumbo[i] != definer_b->mask.jumbo[i])
+			return 1;
+
+	return 0;
+}
+
+int
+mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx,
+			    struct mlx5hws_match_template *mt,
+			    struct mlx5hws_definer *match_definer)
+{
+	u8 *match_hl;
+	int ret;
+
+	/* Union header-layout (hl) is used for creating a single definer
+	 * field layout used with different bitmasks for hash and match.
+	 */
+	match_hl = kzalloc(MLX5_ST_SZ_BYTES(definer_hl), GFP_KERNEL);
+	if (!match_hl)
+		return -ENOMEM;
+
+	/* Convert all mt items to header layout (hl)
+	 * and allocate the match and range field copy array (fc & fcr).
+	 */
+	ret = hws_definer_conv_match_params_to_hl(ctx, mt, match_hl);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to convert items to header layout\n");
+		goto free_fc;
+	}
+
+	/* Find the match definer layout for header layout match union */
+	ret = hws_definer_find_best_match_fit(ctx, match_definer, match_hl);
+	if (ret) {
+		if (ret == E2BIG)
+			mlx5hws_dbg(ctx,
+				    "Failed to create match definer from header layout - E2BIG\n");
+		else
+			mlx5hws_err(ctx,
+				    "Failed to create match definer from header layout (%d)\n",
+				    ret);
+		goto free_fc;
+	}
+
+	kfree(match_hl);
+	return 0;
+
+free_fc:
+	kfree(mt->fc);
+
+	kfree(match_hl);
+	return ret;
+}
+
+int mlx5hws_definer_init_cache(struct mlx5hws_definer_cache **cache)
+{
+	struct mlx5hws_definer_cache *new_cache;
+
+	new_cache = kzalloc(sizeof(*new_cache), GFP_KERNEL);
+	if (!new_cache)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&new_cache->list_head);
+	*cache = new_cache;
+
+	return 0;
+}
+
+void mlx5hws_definer_uninit_cache(struct mlx5hws_definer_cache *cache)
+{
+	kfree(cache);
+}
+
+int mlx5hws_definer_get_obj(struct mlx5hws_context *ctx,
+			    struct mlx5hws_definer *definer)
+{
+	struct mlx5hws_definer_cache *cache = ctx->definer_cache;
+	struct mlx5hws_cmd_definer_create_attr def_attr = {0};
+	struct mlx5hws_definer_cache_item *cached_definer;
+	u32 obj_id;
+	int ret;
+
+	/* Search definer cache for requested definer */
+	list_for_each_entry(cached_definer, &cache->list_head, list_node) {
+		if (mlx5hws_definer_compare(&cached_definer->definer, definer))
+			continue;
+
+		/* Reuse definer and set LRU (move to be first in the list) */
+		list_del_init(&cached_definer->list_node);
+		list_add(&cached_definer->list_node, &cache->list_head);
+		cached_definer->refcount++;
+		return cached_definer->definer.obj_id;
+	}
+
+	/* Allocate and create definer based on the bitmask tag */
+	def_attr.match_mask = definer->mask.jumbo;
+	def_attr.dw_selector = definer->dw_selector;
+	def_attr.byte_selector = definer->byte_selector;
+
+	ret = mlx5hws_cmd_definer_create(ctx->mdev, &def_attr, &obj_id);
+	if (ret)
+		return -1;
+
+	cached_definer = kzalloc(sizeof(*cached_definer), GFP_KERNEL);
+	if (!cached_definer)
+		goto free_definer_obj;
+
+	memcpy(&cached_definer->definer, definer, sizeof(*definer));
+	cached_definer->definer.obj_id = obj_id;
+	cached_definer->refcount = 1;
+	list_add(&cached_definer->list_node, &cache->list_head);
+
+	return obj_id;
+
+free_definer_obj:
+	mlx5hws_cmd_definer_destroy(ctx->mdev, obj_id);
+	return -1;
+}
+
+static void
+hws_definer_put_obj(struct mlx5hws_context *ctx, u32 obj_id)
+{
+	struct mlx5hws_definer_cache_item *cached_definer;
+
+	list_for_each_entry(cached_definer, &ctx->definer_cache->list_head, list_node) {
+		if (cached_definer->definer.obj_id != obj_id)
+			continue;
+
+		/* Object found */
+		if (--cached_definer->refcount)
+			return;
+
+		list_del_init(&cached_definer->list_node);
+		mlx5hws_cmd_definer_destroy(ctx->mdev, cached_definer->definer.obj_id);
+		kfree(cached_definer);
+		return;
+	}
+
+	/* Programming error, object must be part of cache */
+	pr_warn("HWS: failed putting definer object\n");
+}
+
+static struct mlx5hws_definer *
+hws_definer_alloc(struct mlx5hws_context *ctx,
+		  struct mlx5hws_definer_fc *fc,
+		  int fc_sz,
+		  u32 *match_param,
+		  struct mlx5hws_definer *layout,
+		  bool bind_fc)
+{
+	struct mlx5hws_definer *definer;
+	int ret;
+
+	definer = kmemdup(layout, sizeof(*definer), GFP_KERNEL);
+	if (!definer)
+		return NULL;
+
+	/* Align field copy array based on given layout */
+	if (bind_fc) {
+		ret = hws_definer_fc_bind(definer, fc, fc_sz);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed to bind field copy to definer\n");
+			goto free_definer;
+		}
+	}
+
+	/* Create the tag mask used for definer creation */
+	hws_definer_create_tag_mask(match_param, fc, fc_sz, definer->mask.jumbo);
+
+	ret = mlx5hws_definer_get_obj(ctx, definer);
+	if (ret < 0)
+		goto free_definer;
+
+	definer->obj_id = ret;
+	return definer;
+
+free_definer:
+	kfree(definer);
+	return NULL;
+}
+
+void mlx5hws_definer_free(struct mlx5hws_context *ctx,
+			  struct mlx5hws_definer *definer)
+{
+	hws_definer_put_obj(ctx, definer->obj_id);
+	kfree(definer);
+}
+
+static int
+hws_definer_mt_match_init(struct mlx5hws_context *ctx,
+			  struct mlx5hws_match_template *mt,
+			  struct mlx5hws_definer *match_layout)
+{
+	/* Create mandatory match definer */
+	mt->definer = hws_definer_alloc(ctx,
+					mt->fc,
+					mt->fc_sz,
+					mt->match_param,
+					match_layout,
+					true);
+	if (!mt->definer) {
+		mlx5hws_err(ctx, "Failed to create match definer\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+hws_definer_mt_match_uninit(struct mlx5hws_context *ctx,
+			    struct mlx5hws_match_template *mt)
+{
+	mlx5hws_definer_free(ctx, mt->definer);
+}
+
+int mlx5hws_definer_mt_init(struct mlx5hws_context *ctx,
+			    struct mlx5hws_match_template *mt)
+{
+	struct mlx5hws_definer match_layout = {0};
+	int ret;
+
+	ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n");
+		return ret;
+	}
+
+	/* Calculate definers needed for exact match */
+	ret = hws_definer_mt_match_init(ctx, mt, &match_layout);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to init match definers\n");
+		goto free_fc;
+	}
+
+	return 0;
+
+free_fc:
+	kfree(mt->fc);
+	return ret;
+}
+
+void mlx5hws_definer_mt_uninit(struct mlx5hws_context *ctx,
+			       struct mlx5hws_match_template *mt)
+{
+	hws_definer_mt_match_uninit(ctx, mt);
+	kfree(mt->fc);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.h
new file mode 100644
index 000000000000..2f6a7df4021c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_definer.h
@@ -0,0 +1,834 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_DEFINER_H_
+#define MLX5HWS_DEFINER_H_
+
+/* Max available selecotrs */
+#define DW_SELECTORS 9
+#define BYTE_SELECTORS 8
+
+/* Selectors based on match TAG */
+#define DW_SELECTORS_MATCH 6
+#define DW_SELECTORS_LIMITED 3
+
+/* Selectors based on range TAG */
+#define DW_SELECTORS_RANGE 2
+#define BYTE_SELECTORS_RANGE 8
+
+#define HWS_NUM_OF_FLEX_PARSERS 8
+
+enum mlx5hws_definer_fname {
+	MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O,
+	MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I,
+	MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O,
+	MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I,
+	MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O,
+	MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I,
+	MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O,
+	MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I,
+	MLX5HWS_DEFINER_FNAME_ETH_TYPE_O,
+	MLX5HWS_DEFINER_FNAME_ETH_TYPE_I,
+	MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O,
+	MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_CFI_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_CFI_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_ID_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_ID_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_I,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_O,
+	MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_I,
+	MLX5HWS_DEFINER_FNAME_IPV4_IHL_O,
+	MLX5HWS_DEFINER_FNAME_IPV4_IHL_I,
+	MLX5HWS_DEFINER_FNAME_IP_DSCP_O,
+	MLX5HWS_DEFINER_FNAME_IP_DSCP_I,
+	MLX5HWS_DEFINER_FNAME_IP_ECN_O,
+	MLX5HWS_DEFINER_FNAME_IP_ECN_I,
+	MLX5HWS_DEFINER_FNAME_IP_TTL_O,
+	MLX5HWS_DEFINER_FNAME_IP_TTL_I,
+	MLX5HWS_DEFINER_FNAME_IPV4_DST_O,
+	MLX5HWS_DEFINER_FNAME_IPV4_DST_I,
+	MLX5HWS_DEFINER_FNAME_IPV4_SRC_O,
+	MLX5HWS_DEFINER_FNAME_IPV4_SRC_I,
+	MLX5HWS_DEFINER_FNAME_IP_VERSION_O,
+	MLX5HWS_DEFINER_FNAME_IP_VERSION_I,
+	MLX5HWS_DEFINER_FNAME_IP_FRAG_O,
+	MLX5HWS_DEFINER_FNAME_IP_FRAG_I,
+	MLX5HWS_DEFINER_FNAME_IP_LEN_O,
+	MLX5HWS_DEFINER_FNAME_IP_LEN_I,
+	MLX5HWS_DEFINER_FNAME_IP_TOS_O,
+	MLX5HWS_DEFINER_FNAME_IP_TOS_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I,
+	MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I,
+	MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_O,
+	MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_I,
+	MLX5HWS_DEFINER_FNAME_L4_SPORT_O,
+	MLX5HWS_DEFINER_FNAME_L4_SPORT_I,
+	MLX5HWS_DEFINER_FNAME_L4_DPORT_O,
+	MLX5HWS_DEFINER_FNAME_L4_DPORT_I,
+	MLX5HWS_DEFINER_FNAME_TCP_FLAGS_I,
+	MLX5HWS_DEFINER_FNAME_TCP_FLAGS_O,
+	MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM,
+	MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM,
+	MLX5HWS_DEFINER_FNAME_GTP_TEID,
+	MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE,
+	MLX5HWS_DEFINER_FNAME_GTP_EXT_FLAG,
+	MLX5HWS_DEFINER_FNAME_GTP_NEXT_EXT_HDR,
+	MLX5HWS_DEFINER_FNAME_GTP_EXT_HDR_PDU,
+	MLX5HWS_DEFINER_FNAME_GTP_EXT_HDR_QFI,
+	MLX5HWS_DEFINER_FNAME_GTPU_DW0,
+	MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0,
+	MLX5HWS_DEFINER_FNAME_GTPU_DW2,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7,
+	MLX5HWS_DEFINER_FNAME_VPORT_REG_C_0,
+	MLX5HWS_DEFINER_FNAME_VXLAN_FLAGS,
+	MLX5HWS_DEFINER_FNAME_VXLAN_VNI,
+	MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS,
+	MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD0,
+	MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO,
+	MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI,
+	MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD1,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OAM,
+	MLX5HWS_DEFINER_FNAME_GENEVE_PROTO,
+	MLX5HWS_DEFINER_FNAME_GENEVE_VNI,
+	MLX5HWS_DEFINER_FNAME_SOURCE_QP,
+	MLX5HWS_DEFINER_FNAME_SOURCE_GVMI,
+	MLX5HWS_DEFINER_FNAME_REG_0,
+	MLX5HWS_DEFINER_FNAME_REG_1,
+	MLX5HWS_DEFINER_FNAME_REG_2,
+	MLX5HWS_DEFINER_FNAME_REG_3,
+	MLX5HWS_DEFINER_FNAME_REG_4,
+	MLX5HWS_DEFINER_FNAME_REG_5,
+	MLX5HWS_DEFINER_FNAME_REG_6,
+	MLX5HWS_DEFINER_FNAME_REG_7,
+	MLX5HWS_DEFINER_FNAME_REG_8,
+	MLX5HWS_DEFINER_FNAME_REG_9,
+	MLX5HWS_DEFINER_FNAME_REG_10,
+	MLX5HWS_DEFINER_FNAME_REG_11,
+	MLX5HWS_DEFINER_FNAME_REG_A,
+	MLX5HWS_DEFINER_FNAME_REG_B,
+	MLX5HWS_DEFINER_FNAME_GRE_KEY_PRESENT,
+	MLX5HWS_DEFINER_FNAME_GRE_C,
+	MLX5HWS_DEFINER_FNAME_GRE_K,
+	MLX5HWS_DEFINER_FNAME_GRE_S,
+	MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL,
+	MLX5HWS_DEFINER_FNAME_GRE_OPT_KEY,
+	MLX5HWS_DEFINER_FNAME_GRE_OPT_SEQ,
+	MLX5HWS_DEFINER_FNAME_GRE_OPT_CHECKSUM,
+	MLX5HWS_DEFINER_FNAME_INTEGRITY_O,
+	MLX5HWS_DEFINER_FNAME_INTEGRITY_I,
+	MLX5HWS_DEFINER_FNAME_ICMP_DW1,
+	MLX5HWS_DEFINER_FNAME_ICMP_DW2,
+	MLX5HWS_DEFINER_FNAME_ICMP_DW3,
+	MLX5HWS_DEFINER_FNAME_IPSEC_SPI,
+	MLX5HWS_DEFINER_FNAME_IPSEC_SEQUENCE_NUMBER,
+	MLX5HWS_DEFINER_FNAME_IPSEC_SYNDROME,
+	MLX5HWS_DEFINER_FNAME_MPLS0_O,
+	MLX5HWS_DEFINER_FNAME_MPLS1_O,
+	MLX5HWS_DEFINER_FNAME_MPLS2_O,
+	MLX5HWS_DEFINER_FNAME_MPLS3_O,
+	MLX5HWS_DEFINER_FNAME_MPLS4_O,
+	MLX5HWS_DEFINER_FNAME_MPLS0_I,
+	MLX5HWS_DEFINER_FNAME_MPLS1_I,
+	MLX5HWS_DEFINER_FNAME_MPLS2_I,
+	MLX5HWS_DEFINER_FNAME_MPLS3_I,
+	MLX5HWS_DEFINER_FNAME_MPLS4_I,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK,
+	MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS0_O,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS1_O,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS2_O,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS3_O,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS4_O,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS0_I,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS1_I,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS2_I,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS3_I,
+	MLX5HWS_DEFINER_FNAME_OKS2_MPLS4_I,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_0,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_1,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_2,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_3,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_4,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_5,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_6,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_OK_7,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_0,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_1,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_2,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_3,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_4,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_5,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_6,
+	MLX5HWS_DEFINER_FNAME_GENEVE_OPT_DW_7,
+	MLX5HWS_DEFINER_FNAME_IB_L4_OPCODE,
+	MLX5HWS_DEFINER_FNAME_IB_L4_QPN,
+	MLX5HWS_DEFINER_FNAME_IB_L4_A,
+	MLX5HWS_DEFINER_FNAME_RANDOM_NUM,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L2_O,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L2_I,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L3_O,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L3_I,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L4_O,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L4_I,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L4_EXT_O,
+	MLX5HWS_DEFINER_FNAME_PTYPE_L4_EXT_I,
+	MLX5HWS_DEFINER_FNAME_PTYPE_FRAG_O,
+	MLX5HWS_DEFINER_FNAME_PTYPE_FRAG_I,
+	MLX5HWS_DEFINER_FNAME_TNL_HDR_0,
+	MLX5HWS_DEFINER_FNAME_TNL_HDR_1,
+	MLX5HWS_DEFINER_FNAME_TNL_HDR_2,
+	MLX5HWS_DEFINER_FNAME_TNL_HDR_3,
+	MLX5HWS_DEFINER_FNAME_MAX,
+};
+
+enum mlx5hws_definer_match_criteria {
+	MLX5HWS_DEFINER_MATCH_CRITERIA_EMPTY = 0,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_OUTER = 1 << 0,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC = 1 << 1,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_INNER = 1 << 2,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2 = 1 << 3,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC3 = 1 << 4,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC4 = 1 << 5,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC5 = 1 << 6,
+	MLX5HWS_DEFINER_MATCH_CRITERIA_MISC6 = 1 << 7,
+};
+
+enum mlx5hws_definer_type {
+	MLX5HWS_DEFINER_TYPE_MATCH,
+	MLX5HWS_DEFINER_TYPE_JUMBO,
+};
+
+enum mlx5hws_definer_match_flag {
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN_GPE = 1 << 0,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_GENEVE = 1 << 1,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_GTPU = 1 << 2,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE = 1 << 3,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_VXLAN = 1 << 4,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_0_1 = 1 << 5,
+
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_GRE_OPT_KEY = 1 << 6,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_HEADER_2 = 1 << 7,
+
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_GRE = 1 << 8,
+	MLX5HWS_DEFINER_MATCH_FLAG_TNL_MPLS_OVER_UDP = 1 << 9,
+
+	MLX5HWS_DEFINER_MATCH_FLAG_ICMPV4 = 1 << 10,
+	MLX5HWS_DEFINER_MATCH_FLAG_ICMPV6 = 1 << 11,
+	MLX5HWS_DEFINER_MATCH_FLAG_TCP_O = 1 << 12,
+	MLX5HWS_DEFINER_MATCH_FLAG_TCP_I = 1 << 13,
+};
+
+struct mlx5hws_definer_fc {
+	struct mlx5hws_context *ctx;
+	/* Source */
+	u32 s_byte_off;
+	int s_bit_off;
+	u32 s_bit_mask;
+	/* Destination */
+	u32 byte_off;
+	int bit_off;
+	u32 bit_mask;
+	enum mlx5hws_definer_fname fname;
+	void (*tag_set)(struct mlx5hws_definer_fc *fc,
+			void *mach_param,
+			u8 *tag);
+	void (*tag_mask_set)(struct mlx5hws_definer_fc *fc,
+			     void *mach_param,
+			     u8 *tag);
+};
+
+struct mlx5_ifc_definer_hl_eth_l2_bits {
+	u8 dmac_47_16[0x20];
+	u8 dmac_15_0[0x10];
+	u8 l3_ethertype[0x10];
+	u8 reserved_at_40[0x1];
+	u8 sx_sniffer[0x1];
+	u8 functional_lb[0x1];
+	u8 ip_fragmented[0x1];
+	u8 qp_type[0x2];
+	u8 encap_type[0x2];
+	u8 port_number[0x2];
+	u8 l3_type[0x2];
+	u8 l4_type_bwc[0x2];
+	u8 first_vlan_qualifier[0x2];
+	u8 first_priority[0x3];
+	u8 first_cfi[0x1];
+	u8 first_vlan_id[0xc];
+	u8 l4_type[0x4];
+	u8 reserved_at_64[0x2];
+	u8 ipsec_layer[0x2];
+	u8 l2_type[0x2];
+	u8 force_lb[0x1];
+	u8 l2_ok[0x1];
+	u8 l3_ok[0x1];
+	u8 l4_ok[0x1];
+	u8 second_vlan_qualifier[0x2];
+	u8 second_priority[0x3];
+	u8 second_cfi[0x1];
+	u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_definer_hl_eth_l2_src_bits {
+	u8 smac_47_16[0x20];
+	u8 smac_15_0[0x10];
+	u8 loopback_syndrome[0x8];
+	u8 l3_type[0x2];
+	u8 l4_type_bwc[0x2];
+	u8 first_vlan_qualifier[0x2];
+	u8 ip_fragmented[0x1];
+	u8 functional_lb[0x1];
+};
+
+struct mlx5_ifc_definer_hl_ib_l2_bits {
+	u8 sx_sniffer[0x1];
+	u8 force_lb[0x1];
+	u8 functional_lb[0x1];
+	u8 reserved_at_3[0x3];
+	u8 port_number[0x2];
+	u8 sl[0x4];
+	u8 qp_type[0x2];
+	u8 lnh[0x2];
+	u8 dlid[0x10];
+	u8 vl[0x4];
+	u8 lrh_packet_length[0xc];
+	u8 slid[0x10];
+};
+
+struct mlx5_ifc_definer_hl_eth_l3_bits {
+	u8 ip_version[0x4];
+	u8 ihl[0x4];
+	union {
+		u8 tos[0x8];
+		struct {
+			u8 dscp[0x6];
+			u8 ecn[0x2];
+		};
+	};
+	u8 time_to_live_hop_limit[0x8];
+	u8 protocol_next_header[0x8];
+	u8 identification[0x10];
+	union {
+		u8 ipv4_frag[0x10];
+		struct {
+			u8 flags[0x3];
+			u8 fragment_offset[0xd];
+		};
+	};
+	u8 ipv4_total_length[0x10];
+	u8 checksum[0x10];
+	u8 reserved_at_60[0xc];
+	u8 flow_label[0x14];
+	u8 packet_length[0x10];
+	u8 ipv6_payload_length[0x10];
+};
+
+struct mlx5_ifc_definer_hl_eth_l4_bits {
+	u8 source_port[0x10];
+	u8 destination_port[0x10];
+	u8 data_offset[0x4];
+	u8 l4_ok[0x1];
+	u8 l3_ok[0x1];
+	u8 ip_fragmented[0x1];
+	u8 tcp_ns[0x1];
+	union {
+		u8 tcp_flags[0x8];
+		struct {
+			u8 tcp_cwr[0x1];
+			u8 tcp_ece[0x1];
+			u8 tcp_urg[0x1];
+			u8 tcp_ack[0x1];
+			u8 tcp_psh[0x1];
+			u8 tcp_rst[0x1];
+			u8 tcp_syn[0x1];
+			u8 tcp_fin[0x1];
+		};
+	};
+	u8 first_fragment[0x1];
+	u8 reserved_at_31[0xf];
+};
+
+struct mlx5_ifc_definer_hl_src_qp_gvmi_bits {
+	u8 loopback_syndrome[0x8];
+	u8 l3_type[0x2];
+	u8 l4_type_bwc[0x2];
+	u8 first_vlan_qualifier[0x2];
+	u8 reserved_at_e[0x1];
+	u8 functional_lb[0x1];
+	u8 source_gvmi[0x10];
+	u8 force_lb[0x1];
+	u8 ip_fragmented[0x1];
+	u8 source_is_requestor[0x1];
+	u8 reserved_at_23[0x5];
+	u8 source_qp[0x18];
+};
+
+struct mlx5_ifc_definer_hl_ib_l4_bits {
+	u8 opcode[0x8];
+	u8 qp[0x18];
+	u8 se[0x1];
+	u8 migreq[0x1];
+	u8 ackreq[0x1];
+	u8 fecn[0x1];
+	u8 becn[0x1];
+	u8 bth[0x1];
+	u8 deth[0x1];
+	u8 dcceth[0x1];
+	u8 reserved_at_28[0x2];
+	u8 pad_count[0x2];
+	u8 tver[0x4];
+	u8 p_key[0x10];
+	u8 reserved_at_40[0x8];
+	u8 deth_source_qp[0x18];
+};
+
+enum mlx5hws_integrity_ok1_bits {
+	MLX5HWS_DEFINER_OKS1_FIRST_L4_OK = 24,
+	MLX5HWS_DEFINER_OKS1_FIRST_L3_OK = 25,
+	MLX5HWS_DEFINER_OKS1_SECOND_L4_OK = 26,
+	MLX5HWS_DEFINER_OKS1_SECOND_L3_OK = 27,
+	MLX5HWS_DEFINER_OKS1_FIRST_L4_CSUM_OK = 28,
+	MLX5HWS_DEFINER_OKS1_FIRST_IPV4_CSUM_OK = 29,
+	MLX5HWS_DEFINER_OKS1_SECOND_L4_CSUM_OK = 30,
+	MLX5HWS_DEFINER_OKS1_SECOND_IPV4_CSUM_OK = 31,
+};
+
+struct mlx5_ifc_definer_hl_oks1_bits {
+	union {
+		u8 oks1_bits[0x20];
+		struct {
+			u8 second_ipv4_checksum_ok[0x1];
+			u8 second_l4_checksum_ok[0x1];
+			u8 first_ipv4_checksum_ok[0x1];
+			u8 first_l4_checksum_ok[0x1];
+			u8 second_l3_ok[0x1];
+			u8 second_l4_ok[0x1];
+			u8 first_l3_ok[0x1];
+			u8 first_l4_ok[0x1];
+			u8 flex_parser7_steering_ok[0x1];
+			u8 flex_parser6_steering_ok[0x1];
+			u8 flex_parser5_steering_ok[0x1];
+			u8 flex_parser4_steering_ok[0x1];
+			u8 flex_parser3_steering_ok[0x1];
+			u8 flex_parser2_steering_ok[0x1];
+			u8 flex_parser1_steering_ok[0x1];
+			u8 flex_parser0_steering_ok[0x1];
+			u8 second_ipv6_extension_header_vld[0x1];
+			u8 first_ipv6_extension_header_vld[0x1];
+			u8 l3_tunneling_ok[0x1];
+			u8 l2_tunneling_ok[0x1];
+			u8 second_tcp_ok[0x1];
+			u8 second_udp_ok[0x1];
+			u8 second_ipv4_ok[0x1];
+			u8 second_ipv6_ok[0x1];
+			u8 second_l2_ok[0x1];
+			u8 vxlan_ok[0x1];
+			u8 gre_ok[0x1];
+			u8 first_tcp_ok[0x1];
+			u8 first_udp_ok[0x1];
+			u8 first_ipv4_ok[0x1];
+			u8 first_ipv6_ok[0x1];
+			u8 first_l2_ok[0x1];
+		};
+	};
+};
+
+struct mlx5_ifc_definer_hl_oks2_bits {
+	u8 reserved_at_0[0xa];
+	u8 second_mpls_ok[0x1];
+	u8 second_mpls4_s_bit[0x1];
+	u8 second_mpls4_qualifier[0x1];
+	u8 second_mpls3_s_bit[0x1];
+	u8 second_mpls3_qualifier[0x1];
+	u8 second_mpls2_s_bit[0x1];
+	u8 second_mpls2_qualifier[0x1];
+	u8 second_mpls1_s_bit[0x1];
+	u8 second_mpls1_qualifier[0x1];
+	u8 second_mpls0_s_bit[0x1];
+	u8 second_mpls0_qualifier[0x1];
+	u8 first_mpls_ok[0x1];
+	u8 first_mpls4_s_bit[0x1];
+	u8 first_mpls4_qualifier[0x1];
+	u8 first_mpls3_s_bit[0x1];
+	u8 first_mpls3_qualifier[0x1];
+	u8 first_mpls2_s_bit[0x1];
+	u8 first_mpls2_qualifier[0x1];
+	u8 first_mpls1_s_bit[0x1];
+	u8 first_mpls1_qualifier[0x1];
+	u8 first_mpls0_s_bit[0x1];
+	u8 first_mpls0_qualifier[0x1];
+};
+
+struct mlx5_ifc_definer_hl_voq_bits {
+	u8 reserved_at_0[0x18];
+	u8 ecn_ok[0x1];
+	u8 congestion[0x1];
+	u8 profile[0x2];
+	u8 internal_prio[0x4];
+};
+
+struct mlx5_ifc_definer_hl_ipv4_src_dst_bits {
+	u8 source_address[0x20];
+	u8 destination_address[0x20];
+};
+
+struct mlx5_ifc_definer_hl_random_number_bits {
+	u8 random_number[0x10];
+	u8 reserved[0x10];
+};
+
+struct mlx5_ifc_definer_hl_ipv6_addr_bits {
+	u8 ipv6_address_127_96[0x20];
+	u8 ipv6_address_95_64[0x20];
+	u8 ipv6_address_63_32[0x20];
+	u8 ipv6_address_31_0[0x20];
+};
+
+struct mlx5_ifc_definer_tcp_icmp_header_bits {
+	union {
+		struct {
+			u8 icmp_dw1[0x20];
+			u8 icmp_dw2[0x20];
+			u8 icmp_dw3[0x20];
+		};
+		struct {
+			u8 tcp_seq[0x20];
+			u8 tcp_ack[0x20];
+			u8 tcp_win_urg[0x20];
+		};
+	};
+};
+
+struct mlx5_ifc_definer_hl_tunnel_header_bits {
+	u8 tunnel_header_0[0x20];
+	u8 tunnel_header_1[0x20];
+	u8 tunnel_header_2[0x20];
+	u8 tunnel_header_3[0x20];
+};
+
+struct mlx5_ifc_definer_hl_ipsec_bits {
+	u8 spi[0x20];
+	u8 sequence_number[0x20];
+	u8 reserved[0x10];
+	u8 ipsec_syndrome[0x8];
+	u8 next_header[0x8];
+};
+
+struct mlx5_ifc_definer_hl_metadata_bits {
+	u8 metadata_to_cqe[0x20];
+	u8 general_purpose[0x20];
+	u8 acomulated_hash[0x20];
+};
+
+struct mlx5_ifc_definer_hl_flex_parser_bits {
+	u8 flex_parser_7[0x20];
+	u8 flex_parser_6[0x20];
+	u8 flex_parser_5[0x20];
+	u8 flex_parser_4[0x20];
+	u8 flex_parser_3[0x20];
+	u8 flex_parser_2[0x20];
+	u8 flex_parser_1[0x20];
+	u8 flex_parser_0[0x20];
+};
+
+struct mlx5_ifc_definer_hl_registers_bits {
+	u8 register_c_10[0x20];
+	u8 register_c_11[0x20];
+	u8 register_c_8[0x20];
+	u8 register_c_9[0x20];
+	u8 register_c_6[0x20];
+	u8 register_c_7[0x20];
+	u8 register_c_4[0x20];
+	u8 register_c_5[0x20];
+	u8 register_c_2[0x20];
+	u8 register_c_3[0x20];
+	u8 register_c_0[0x20];
+	u8 register_c_1[0x20];
+};
+
+struct mlx5_ifc_definer_hl_mpls_bits {
+	u8 mpls0_label[0x20];
+	u8 mpls1_label[0x20];
+	u8 mpls2_label[0x20];
+	u8 mpls3_label[0x20];
+	u8 mpls4_label[0x20];
+};
+
+struct mlx5_ifc_definer_hl_bits {
+	struct mlx5_ifc_definer_hl_eth_l2_bits eth_l2_outer;
+	struct mlx5_ifc_definer_hl_eth_l2_bits eth_l2_inner;
+	struct mlx5_ifc_definer_hl_eth_l2_src_bits eth_l2_src_outer;
+	struct mlx5_ifc_definer_hl_eth_l2_src_bits eth_l2_src_inner;
+	struct mlx5_ifc_definer_hl_ib_l2_bits ib_l2;
+	struct mlx5_ifc_definer_hl_eth_l3_bits eth_l3_outer;
+	struct mlx5_ifc_definer_hl_eth_l3_bits eth_l3_inner;
+	struct mlx5_ifc_definer_hl_eth_l4_bits eth_l4_outer;
+	struct mlx5_ifc_definer_hl_eth_l4_bits eth_l4_inner;
+	struct mlx5_ifc_definer_hl_src_qp_gvmi_bits source_qp_gvmi;
+	struct mlx5_ifc_definer_hl_ib_l4_bits ib_l4;
+	struct mlx5_ifc_definer_hl_oks1_bits oks1;
+	struct mlx5_ifc_definer_hl_oks2_bits oks2;
+	struct mlx5_ifc_definer_hl_voq_bits voq;
+	u8 reserved_at_480[0x380];
+	struct mlx5_ifc_definer_hl_ipv4_src_dst_bits ipv4_src_dest_outer;
+	struct mlx5_ifc_definer_hl_ipv4_src_dst_bits ipv4_src_dest_inner;
+	struct mlx5_ifc_definer_hl_ipv6_addr_bits ipv6_dst_outer;
+	struct mlx5_ifc_definer_hl_ipv6_addr_bits ipv6_dst_inner;
+	struct mlx5_ifc_definer_hl_ipv6_addr_bits ipv6_src_outer;
+	struct mlx5_ifc_definer_hl_ipv6_addr_bits ipv6_src_inner;
+	u8 unsupported_dest_ib_l3[0x80];
+	u8 unsupported_source_ib_l3[0x80];
+	u8 unsupported_udp_misc_outer[0x20];
+	u8 unsupported_udp_misc_inner[0x20];
+	struct mlx5_ifc_definer_tcp_icmp_header_bits tcp_icmp;
+	struct mlx5_ifc_definer_hl_tunnel_header_bits tunnel_header;
+	struct mlx5_ifc_definer_hl_mpls_bits mpls_outer;
+	struct mlx5_ifc_definer_hl_mpls_bits mpls_inner;
+	u8 unsupported_config_headers_outer[0x80];
+	u8 unsupported_config_headers_inner[0x80];
+	struct mlx5_ifc_definer_hl_random_number_bits random_number;
+	struct mlx5_ifc_definer_hl_ipsec_bits ipsec;
+	struct mlx5_ifc_definer_hl_metadata_bits metadata;
+	u8 unsupported_utc_timestamp[0x40];
+	u8 unsupported_free_running_timestamp[0x40];
+	struct mlx5_ifc_definer_hl_flex_parser_bits flex_parser;
+	struct mlx5_ifc_definer_hl_registers_bits registers;
+	/* Reserved in case header layout on future HW */
+	u8 unsupported_reserved[0xd40];
+};
+
+enum mlx5hws_definer_gtp {
+	MLX5HWS_DEFINER_GTP_EXT_HDR_BIT = 0x04,
+};
+
+struct mlx5_ifc_header_gtp_bits {
+	u8 version[0x3];
+	u8 proto_type[0x1];
+	u8 reserved1[0x1];
+	union {
+		u8 msg_flags[0x3];
+		struct {
+			u8 ext_hdr_flag[0x1];
+			u8 seq_num_flag[0x1];
+			u8 pdu_flag[0x1];
+		};
+	};
+	u8 msg_type[0x8];
+	u8 msg_len[0x8];
+	u8 teid[0x20];
+};
+
+struct mlx5_ifc_header_opt_gtp_bits {
+	u8 seq_num[0x10];
+	u8 pdu_num[0x8];
+	u8 next_ext_hdr_type[0x8];
+};
+
+struct mlx5_ifc_header_gtp_psc_bits {
+	u8 len[0x8];
+	u8 pdu_type[0x4];
+	u8 flags[0x4];
+	u8 qfi[0x8];
+	u8 reserved2[0x8];
+};
+
+struct mlx5_ifc_header_ipv6_vtc_bits {
+	u8 version[0x4];
+	union {
+		u8 tos[0x8];
+		struct {
+			u8 dscp[0x6];
+			u8 ecn[0x2];
+		};
+	};
+	u8 flow_label[0x14];
+};
+
+struct mlx5_ifc_header_ipv6_routing_ext_bits {
+	u8 next_hdr[0x8];
+	u8 hdr_len[0x8];
+	u8 type[0x8];
+	u8 segments_left[0x8];
+	union {
+		u8 flags[0x20];
+		struct {
+			u8 last_entry[0x8];
+			u8 flag[0x8];
+			u8 tag[0x10];
+		};
+	};
+};
+
+struct mlx5_ifc_header_vxlan_bits {
+	u8 flags[0x8];
+	u8 reserved1[0x18];
+	u8 vni[0x18];
+	u8 reserved2[0x8];
+};
+
+struct mlx5_ifc_header_vxlan_gpe_bits {
+	u8 flags[0x8];
+	u8 rsvd0[0x10];
+	u8 protocol[0x8];
+	u8 vni[0x18];
+	u8 rsvd1[0x8];
+};
+
+struct mlx5_ifc_header_gre_bits {
+	union {
+		u8 c_rsvd0_ver[0x10];
+		struct {
+			u8 gre_c_present[0x1];
+			u8 reserved_at_1[0x1];
+			u8 gre_k_present[0x1];
+			u8 gre_s_present[0x1];
+			u8 reserved_at_4[0x9];
+			u8 version[0x3];
+		};
+	};
+	u8 gre_protocol[0x10];
+	u8 checksum[0x10];
+	u8 reserved_at_30[0x10];
+};
+
+struct mlx5_ifc_header_geneve_bits {
+	union {
+		u8 ver_opt_len_o_c_rsvd[0x10];
+		struct {
+			u8 version[0x2];
+			u8 opt_len[0x6];
+			u8 o_flag[0x1];
+			u8 c_flag[0x1];
+			u8 reserved_at_a[0x6];
+		};
+	};
+	u8 protocol_type[0x10];
+	u8 vni[0x18];
+	u8 reserved_at_38[0x8];
+};
+
+struct mlx5_ifc_header_geneve_opt_bits {
+	u8 class[0x10];
+	u8 type[0x8];
+	u8 reserved[0x3];
+	u8 len[0x5];
+};
+
+struct mlx5_ifc_header_icmp_bits {
+	union {
+		u8 icmp_dw1[0x20];
+		struct {
+			u8 type[0x8];
+			u8 code[0x8];
+			u8 cksum[0x10];
+		};
+	};
+	union {
+		u8 icmp_dw2[0x20];
+		struct {
+			u8 ident[0x10];
+			u8 seq_nb[0x10];
+		};
+	};
+};
+
+struct mlx5hws_definer {
+	enum mlx5hws_definer_type type;
+	u8 dw_selector[DW_SELECTORS];
+	u8 byte_selector[BYTE_SELECTORS];
+	struct mlx5hws_rule_match_tag mask;
+	u32 obj_id;
+};
+
+struct mlx5hws_definer_cache {
+	struct list_head list_head;
+};
+
+struct mlx5hws_definer_cache_item {
+	struct mlx5hws_definer definer;
+	u32 refcount;
+	struct list_head list_node;
+};
+
+static inline bool
+mlx5hws_definer_is_jumbo(struct mlx5hws_definer *definer)
+{
+	return (definer->type == MLX5HWS_DEFINER_TYPE_JUMBO);
+}
+
+void mlx5hws_definer_create_tag(u32 *match_param,
+				struct mlx5hws_definer_fc *fc,
+				u32 fc_sz,
+				u8 *tag);
+
+int mlx5hws_definer_get_id(struct mlx5hws_definer *definer);
+
+int mlx5hws_definer_mt_init(struct mlx5hws_context *ctx,
+			    struct mlx5hws_match_template *mt);
+
+void mlx5hws_definer_mt_uninit(struct mlx5hws_context *ctx,
+			       struct mlx5hws_match_template *mt);
+
+int mlx5hws_definer_init_cache(struct mlx5hws_definer_cache **cache);
+
+void mlx5hws_definer_uninit_cache(struct mlx5hws_definer_cache *cache);
+
+int mlx5hws_definer_compare(struct mlx5hws_definer *definer_a,
+			    struct mlx5hws_definer *definer_b);
+
+int mlx5hws_definer_get_obj(struct mlx5hws_context *ctx,
+			    struct mlx5hws_definer *definer);
+
+void mlx5hws_definer_free(struct mlx5hws_context *ctx,
+			  struct mlx5hws_definer *definer);
+
+int mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx,
+				struct mlx5hws_match_template *mt,
+				struct mlx5hws_definer *match_definer);
+
+struct mlx5hws_definer_fc *
+mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx,
+						   u8 match_criteria_enable,
+						   u32 *match_param,
+						   int *fc_sz);
+
+#endif /* MLX5HWS_DEFINER_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_internal.h
new file mode 100644
index 000000000000..5643be1cd5bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_internal.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_INTERNAL_H_
+#define MLX5HWS_INTERNAL_H_
+
+#include <linux/mlx5/transobj.h>
+#include <linux/mlx5/vport.h>
+#include "fs_core.h"
+#include "wq.h"
+#include "lib/mlx5.h"
+
+#include "mlx5hws_prm.h"
+#include "mlx5hws.h"
+#include "mlx5hws_pool.h"
+#include "mlx5hws_vport.h"
+#include "mlx5hws_context.h"
+#include "mlx5hws_table.h"
+#include "mlx5hws_send.h"
+#include "mlx5hws_rule.h"
+#include "mlx5hws_cmd.h"
+#include "mlx5hws_action.h"
+#include "mlx5hws_definer.h"
+#include "mlx5hws_matcher.h"
+#include "mlx5hws_debug.h"
+#include "mlx5hws_pat_arg.h"
+#include "mlx5hws_bwc.h"
+#include "mlx5hws_bwc_complex.h"
+
+#define W_SIZE		2
+#define DW_SIZE		4
+#define BITS_IN_BYTE	8
+#define BITS_IN_DW	(BITS_IN_BYTE * DW_SIZE)
+
+#define IS_BIT_SET(_value, _bit) ((_value) & (1ULL << (_bit)))
+
+#define mlx5hws_err(ctx, arg...) mlx5_core_err((ctx)->mdev, ##arg)
+#define mlx5hws_info(ctx, arg...) mlx5_core_info((ctx)->mdev, ##arg)
+#define mlx5hws_dbg(ctx, arg...) mlx5_core_dbg((ctx)->mdev, ##arg)
+
+#define MLX5HWS_TABLE_TYPE_BASE 2
+#define MLX5HWS_ACTION_STE_IDX_ANY 0
+
+static inline bool is_mem_zero(const u8 *mem, size_t size)
+{
+	if (unlikely(!size)) {
+		pr_warn("HWS: invalid buffer of size 0 in %s\n", __func__);
+		return true;
+	}
+
+	return (*mem == 0) && memcmp(mem, mem + 1, size - 1) == 0;
+}
+
+static inline unsigned long align(unsigned long val, unsigned long align)
+{
+	return (val + align - 1) & ~(align - 1);
+}
+
+#endif /* MLX5HWS_INTERNAL_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c
new file mode 100644
index 000000000000..33d2b31e4b46
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c
@@ -0,0 +1,1216 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+enum mlx5hws_matcher_rtc_type {
+	HWS_MATCHER_RTC_TYPE_MATCH,
+	HWS_MATCHER_RTC_TYPE_STE_ARRAY,
+	HWS_MATCHER_RTC_TYPE_MAX,
+};
+
+static const char * const mlx5hws_matcher_rtc_type_str[] = {
+	[HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH",
+	[HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY",
+	[HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN",
+};
+
+static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type)
+{
+	if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX)
+		rtc_type = HWS_MATCHER_RTC_TYPE_MAX;
+	return mlx5hws_matcher_rtc_type_str[rtc_type];
+}
+
+static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules)
+{
+	/* Collision table concatenation is done only for large rule tables */
+	return log_num_of_rules > MLX5HWS_MATCHER_ASSURED_RULES_TH;
+}
+
+static u8 hws_matcher_rules_to_tbl_depth(u8 log_num_of_rules)
+{
+	if (hws_matcher_requires_col_tbl(log_num_of_rules))
+		return MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH;
+
+	/* For small rule tables we use a single deep table to assure insertion */
+	return min(log_num_of_rules, MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH);
+}
+
+static void hws_matcher_destroy_end_ft(struct mlx5hws_matcher *matcher)
+{
+	mlx5hws_table_destroy_default_ft(matcher->tbl, matcher->end_ft_id);
+}
+
+static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_table *tbl = matcher->tbl;
+	int ret;
+
+	ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &matcher->end_ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Failed to create matcher end flow table\n");
+		return ret;
+	}
+	return 0;
+}
+
+static int hws_matcher_connect(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_context *ctx = tbl->ctx;
+	struct mlx5hws_matcher *prev = NULL;
+	struct mlx5hws_matcher *next = NULL;
+	struct mlx5hws_matcher *tmp_matcher;
+	int ret;
+
+	/* Find location in matcher list */
+	if (list_empty(&tbl->matchers_list)) {
+		list_add(&matcher->list_node, &tbl->matchers_list);
+		goto connect;
+	}
+
+	list_for_each_entry(tmp_matcher, &tbl->matchers_list, list_node) {
+		if (tmp_matcher->attr.priority > matcher->attr.priority) {
+			next = tmp_matcher;
+			break;
+		}
+		prev = tmp_matcher;
+	}
+
+	if (next)
+		/* insert before next */
+		list_add_tail(&matcher->list_node, &next->list_node);
+	else
+		/* insert after prev */
+		list_add(&matcher->list_node, &prev->list_node);
+
+connect:
+	if (next) {
+		/* Connect to next RTC */
+		ret = mlx5hws_table_ft_set_next_rtc(ctx,
+						    matcher->end_ft_id,
+						    tbl->fw_ft_type,
+						    next->match_ste.rtc_0_id,
+						    next->match_ste.rtc_1_id);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed to connect new matcher to next RTC\n");
+			goto remove_from_list;
+		}
+	} else {
+		/* Connect last matcher to next miss_tbl if exists */
+		ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed connect new matcher to miss_tbl\n");
+			goto remove_from_list;
+		}
+	}
+
+	/* Connect to previous FT */
+	ret = mlx5hws_table_ft_set_next_rtc(ctx,
+					    prev ? prev->end_ft_id : tbl->ft_id,
+					    tbl->fw_ft_type,
+					    matcher->match_ste.rtc_0_id,
+					    matcher->match_ste.rtc_1_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to connect new matcher to previous FT\n");
+		goto remove_from_list;
+	}
+
+	/* Reset prev matcher FT default miss (drop refcount) */
+	ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev ? prev->end_ft_id : tbl->ft_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to reset matcher ft default miss\n");
+		goto remove_from_list;
+	}
+
+	if (!prev) {
+		/* Update tables missing to current matcher in the table */
+		ret = mlx5hws_table_update_connected_miss_tables(tbl);
+		if (ret) {
+			mlx5hws_err(ctx, "Fatal error, failed to update connected miss table\n");
+			goto remove_from_list;
+		}
+	}
+
+	return 0;
+
+remove_from_list:
+	list_del_init(&matcher->list_node);
+	return ret;
+}
+
+static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher *next = NULL, *prev = NULL;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	u32 prev_ft_id = tbl->ft_id;
+	int ret;
+
+	if (!list_is_first(&matcher->list_node, &tbl->matchers_list)) {
+		prev = list_prev_entry(matcher, list_node);
+		prev_ft_id = prev->end_ft_id;
+	}
+
+	if (!list_is_last(&matcher->list_node, &tbl->matchers_list))
+		next = list_next_entry(matcher, list_node);
+
+	list_del_init(&matcher->list_node);
+
+	if (next) {
+		/* Connect previous end FT to next RTC */
+		ret = mlx5hws_table_ft_set_next_rtc(tbl->ctx,
+						    prev_ft_id,
+						    tbl->fw_ft_type,
+						    next->match_ste.rtc_0_id,
+						    next->match_ste.rtc_1_id);
+		if (ret) {
+			mlx5hws_err(tbl->ctx, "Failed to disconnect matcher\n");
+			goto matcher_reconnect;
+		}
+	} else {
+		ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl);
+		if (ret) {
+			mlx5hws_err(tbl->ctx, "Failed to disconnect last matcher\n");
+			goto matcher_reconnect;
+		}
+	}
+
+	/* Removing first matcher, update connected miss tables if exists */
+	if (prev_ft_id == tbl->ft_id) {
+		ret = mlx5hws_table_update_connected_miss_tables(tbl);
+		if (ret) {
+			mlx5hws_err(tbl->ctx, "Fatal error, failed to update connected miss table\n");
+			goto matcher_reconnect;
+		}
+	}
+
+	ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev_ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Fatal error, failed to restore matcher ft default miss\n");
+		goto matcher_reconnect;
+	}
+
+	return 0;
+
+matcher_reconnect:
+	if (list_empty(&tbl->matchers_list) || !prev)
+		list_add(&matcher->list_node, &tbl->matchers_list);
+	else
+		/* insert after prev matcher */
+		list_add(&matcher->list_node, &prev->list_node);
+
+	return ret;
+}
+
+static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
+					struct mlx5hws_cmd_rtc_create_attr *rtc_attr,
+					enum mlx5hws_matcher_rtc_type rtc_type,
+					bool is_mirror)
+{
+	struct mlx5hws_pool_chunk *ste = &matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].ste;
+	enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src;
+	bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH;
+
+	if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) ||
+	    (flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) {
+		/* Optimize FDB RTC */
+		rtc_attr->log_size = 0;
+		rtc_attr->log_depth = 0;
+	} else {
+		/* Keep original values */
+		rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order;
+		rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0;
+	}
+}
+
+static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
+				  enum mlx5hws_matcher_rtc_type rtc_type,
+				  u8 action_ste_selector)
+{
+	struct mlx5hws_matcher_attr *attr = &matcher->attr;
+	struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
+	struct mlx5hws_match_template *mt = matcher->mt;
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	struct mlx5hws_action_default_stc *default_stc;
+	struct mlx5hws_matcher_action_ste *action_ste;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_pool *ste_pool, *stc_pool;
+	struct mlx5hws_pool_chunk *ste;
+	u32 *rtc_0_id, *rtc_1_id;
+	u32 obj_id;
+	int ret;
+
+	switch (rtc_type) {
+	case HWS_MATCHER_RTC_TYPE_MATCH:
+		rtc_0_id = &matcher->match_ste.rtc_0_id;
+		rtc_1_id = &matcher->match_ste.rtc_1_id;
+		ste_pool = matcher->match_ste.pool;
+		ste = &matcher->match_ste.ste;
+		ste->order = attr->table.sz_col_log + attr->table.sz_row_log;
+
+		rtc_attr.log_size = attr->table.sz_row_log;
+		rtc_attr.log_depth = attr->table.sz_col_log;
+		rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
+		rtc_attr.is_scnd_range = 0;
+		rtc_attr.miss_ft_id = matcher->end_ft_id;
+
+		if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) {
+			/* The usual Hash Table */
+			rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
+
+			/* The first mt is used since all share the same definer */
+			rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
+		} else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) {
+			rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
+			rtc_attr.num_hash_definer = 1;
+
+			if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
+				/* Hash Split Table */
+				rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
+				rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
+			} else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
+				/* Linear Lookup Table */
+				rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR;
+				rtc_attr.match_definer_0 = ctx->caps->linear_match_definer;
+			}
+		}
+
+		/* Match pool requires implicit allocation */
+		ret = mlx5hws_pool_chunk_alloc(ste_pool, ste);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed to allocate STE for %s RTC",
+				    hws_matcher_rtc_type_to_str(rtc_type));
+			return ret;
+		}
+		break;
+
+	case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
+		action_ste = &matcher->action_ste[action_ste_selector];
+
+		rtc_0_id = &action_ste->rtc_0_id;
+		rtc_1_id = &action_ste->rtc_1_id;
+		ste_pool = action_ste->pool;
+		ste = &action_ste->ste;
+		ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
+			     attr->table.sz_row_log;
+		rtc_attr.log_size = ste->order;
+		rtc_attr.log_depth = 0;
+		rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
+		/* The action STEs use the default always hit definer */
+		rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
+		rtc_attr.is_frst_jumbo = false;
+		rtc_attr.miss_ft_id = 0;
+		break;
+
+	default:
+		mlx5hws_err(ctx, "HWS Invalid RTC type\n");
+		return -EINVAL;
+	}
+
+	obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+
+	rtc_attr.pd = ctx->pd_num;
+	rtc_attr.ste_base = obj_id;
+	rtc_attr.ste_offset = ste->offset;
+	rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
+	rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false);
+	hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false);
+
+	/* STC is a single resource (obj_id), use any STC for the ID */
+	stc_pool = ctx->stc_pool[tbl->type];
+	default_stc = ctx->common_res[tbl->type].default_stc;
+	obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
+	rtc_attr.stc_base = obj_id;
+
+	ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create matcher RTC of type %s",
+			    hws_matcher_rtc_type_to_str(rtc_type));
+		goto free_ste;
+	}
+
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
+		obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+		rtc_attr.ste_base = obj_id;
+		rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true);
+
+		obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
+		rtc_attr.stc_base = obj_id;
+		hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true);
+
+		ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s",
+				    hws_matcher_rtc_type_to_str(rtc_type));
+			goto destroy_rtc_0;
+		}
+	}
+
+	return 0;
+
+destroy_rtc_0:
+	mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id);
+free_ste:
+	if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
+		mlx5hws_pool_chunk_free(ste_pool, ste);
+	return ret;
+}
+
+static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
+				    enum mlx5hws_matcher_rtc_type rtc_type,
+				    u8 action_ste_selector)
+{
+	struct mlx5hws_matcher_action_ste *action_ste;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_pool_chunk *ste;
+	struct mlx5hws_pool *ste_pool;
+	u32 rtc_0_id, rtc_1_id;
+
+	switch (rtc_type) {
+	case HWS_MATCHER_RTC_TYPE_MATCH:
+		rtc_0_id = matcher->match_ste.rtc_0_id;
+		rtc_1_id = matcher->match_ste.rtc_1_id;
+		ste_pool = matcher->match_ste.pool;
+		ste = &matcher->match_ste.ste;
+		break;
+	case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
+		action_ste = &matcher->action_ste[action_ste_selector];
+		rtc_0_id = action_ste->rtc_0_id;
+		rtc_1_id = action_ste->rtc_1_id;
+		ste_pool = action_ste->pool;
+		ste = &action_ste->ste;
+		break;
+	default:
+		return;
+	}
+
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
+		mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id);
+
+	mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id);
+	if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
+		mlx5hws_pool_chunk_free(ste_pool, ste);
+}
+
+static int
+hws_matcher_check_attr_sz(struct mlx5hws_cmd_query_caps *caps,
+			  struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher_attr *attr = &matcher->attr;
+
+	if (attr->table.sz_col_log > caps->rtc_log_depth_max) {
+		mlx5hws_err(matcher->tbl->ctx, "Matcher depth exceeds limit %d\n",
+			    caps->rtc_log_depth_max);
+		return -EOPNOTSUPP;
+	}
+
+	if (attr->table.sz_col_log + attr->table.sz_row_log > caps->ste_alloc_log_max) {
+		mlx5hws_err(matcher->tbl->ctx, "Total matcher size exceeds limit %d\n",
+			    caps->ste_alloc_log_max);
+		return -EOPNOTSUPP;
+	}
+
+	if (attr->table.sz_col_log + attr->table.sz_row_log < caps->ste_alloc_log_gran) {
+		mlx5hws_err(matcher->tbl->ctx, "Total matcher size below limit %d\n",
+			    caps->ste_alloc_log_gran);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void hws_matcher_set_pool_attr(struct mlx5hws_pool_attr *attr,
+				      struct mlx5hws_matcher *matcher)
+{
+	switch (matcher->attr.optimize_flow_src) {
+	case MLX5HWS_MATCHER_FLOW_SRC_VPORT:
+		attr->opt_type = MLX5HWS_POOL_OPTIMIZE_ORIG;
+		break;
+	case MLX5HWS_MATCHER_FLOW_SRC_WIRE:
+		attr->opt_type = MLX5HWS_POOL_OPTIMIZE_MIRROR;
+		break;
+	default:
+		break;
+	}
+}
+
+static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher,
+					    struct mlx5hws_action_template *at)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	bool valid;
+	int ret;
+
+	valid = mlx5hws_action_check_combo(ctx, at->action_type_arr, matcher->tbl->type);
+	if (!valid) {
+		mlx5hws_err(ctx, "Invalid combination in action template\n");
+		return -EINVAL;
+	}
+
+	/* Process action template to setters */
+	ret = mlx5hws_action_template_process(at);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to process action template\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher)
+{
+	struct mlx5hws_matcher_resize_data *resize_data;
+
+	resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL);
+	if (!resize_data)
+		return -ENOMEM;
+
+	resize_data->max_stes = src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes;
+
+	resize_data->action_ste[0].stc = src_matcher->action_ste[0].stc;
+	resize_data->action_ste[0].rtc_0_id = src_matcher->action_ste[0].rtc_0_id;
+	resize_data->action_ste[0].rtc_1_id = src_matcher->action_ste[0].rtc_1_id;
+	resize_data->action_ste[0].pool = src_matcher->action_ste[0].max_stes ?
+					  src_matcher->action_ste[0].pool :
+					  NULL;
+	resize_data->action_ste[1].stc = src_matcher->action_ste[1].stc;
+	resize_data->action_ste[1].rtc_0_id = src_matcher->action_ste[1].rtc_0_id;
+	resize_data->action_ste[1].rtc_1_id = src_matcher->action_ste[1].rtc_1_id;
+	resize_data->action_ste[1].pool = src_matcher->action_ste[1].max_stes ?
+					  src_matcher->action_ste[1].pool :
+					   NULL;
+
+	/* Place the new resized matcher on the dst matcher's list */
+	list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
+
+	/* Move all the previous resized matchers to the dst matcher's list */
+	while (!list_empty(&src_matcher->resize_data)) {
+		resize_data = list_first_entry(&src_matcher->resize_data,
+					       struct mlx5hws_matcher_resize_data,
+					       list_node);
+		list_del_init(&resize_data->list_node);
+		list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
+	}
+
+	return 0;
+}
+
+static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher_resize_data *resize_data;
+
+	if (!mlx5hws_matcher_is_resizable(matcher))
+		return;
+
+	while (!list_empty(&matcher->resize_data)) {
+		resize_data = list_first_entry(&matcher->resize_data,
+					       struct mlx5hws_matcher_resize_data,
+					       list_node);
+		list_del_init(&resize_data->list_node);
+
+		if (resize_data->max_stes) {
+			mlx5hws_action_free_single_stc(matcher->tbl->ctx,
+						       matcher->tbl->type,
+						       &resize_data->action_ste[1].stc);
+			mlx5hws_action_free_single_stc(matcher->tbl->ctx,
+						       matcher->tbl->type,
+						       &resize_data->action_ste[0].stc);
+
+			if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
+				mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
+							resize_data->action_ste[1].rtc_1_id);
+				mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
+							resize_data->action_ste[0].rtc_1_id);
+			}
+			mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
+						resize_data->action_ste[1].rtc_0_id);
+			mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
+						resize_data->action_ste[0].rtc_0_id);
+			if (resize_data->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].pool) {
+				mlx5hws_pool_destroy(resize_data->action_ste[1].pool);
+				mlx5hws_pool_destroy(resize_data->action_ste[0].pool);
+			}
+		}
+
+		kfree(resize_data);
+	}
+}
+
+static int
+hws_matcher_bind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
+{
+	struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+	struct mlx5hws_matcher_action_ste *action_ste;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_pool_attr pool_attr = {0};
+	struct mlx5hws_context *ctx = tbl->ctx;
+	int ret;
+
+	action_ste = &matcher->action_ste[action_ste_selector];
+
+	/* Allocate action STE mempool */
+	pool_attr.table_type = tbl->type;
+	pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
+	pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
+	pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
+				 matcher->attr.table.sz_row_log;
+	hws_matcher_set_pool_attr(&pool_attr, matcher);
+	action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
+	if (!action_ste->pool) {
+		mlx5hws_err(ctx, "Failed to create action ste pool\n");
+		return -EINVAL;
+	}
+
+	/* Allocate action RTC */
+	ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create action RTC\n");
+		goto free_ste_pool;
+	}
+
+	/* Allocate STC for jumps to STE */
+	stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+	stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
+	stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+	stc_attr.ste_table.ste = action_ste->ste;
+	stc_attr.ste_table.ste_pool = action_ste->pool;
+	stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
+
+	ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type,
+					      &action_ste->stc);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create action jump to table STC\n");
+		goto free_rtc;
+	}
+
+	return 0;
+
+free_rtc:
+	hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+free_ste_pool:
+	mlx5hws_pool_destroy(action_ste->pool);
+	return ret;
+}
+
+static void hws_matcher_unbind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
+{
+	struct mlx5hws_matcher_action_ste *action_ste;
+	struct mlx5hws_table *tbl = matcher->tbl;
+
+	action_ste = &matcher->action_ste[action_ste_selector];
+
+	if (!action_ste->max_stes ||
+	    matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION ||
+	    mlx5hws_matcher_is_in_resize(matcher))
+		return;
+
+	mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc);
+	hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+	mlx5hws_pool_destroy(action_ste->pool);
+}
+
+static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
+{
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_context *ctx = tbl->ctx;
+	u32 required_stes;
+	u8 max_stes = 0;
+	int i, ret;
+
+	if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)
+		return 0;
+
+	for (i = 0; i < matcher->num_of_at; i++) {
+		struct mlx5hws_action_template *at = &matcher->at[i];
+
+		ret = hws_matcher_check_and_process_at(matcher, at);
+		if (ret) {
+			mlx5hws_err(ctx, "Invalid at %d", i);
+			return ret;
+		}
+
+		required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
+		max_stes = max(max_stes, required_stes);
+
+		/* Future: Optimize reparse */
+	}
+
+	/* There are no additional STEs required for matcher */
+	if (!max_stes)
+		return 0;
+
+	matcher->action_ste[0].max_stes = max_stes;
+	matcher->action_ste[1].max_stes = max_stes;
+
+	ret = hws_matcher_bind_at_idx(matcher, 0);
+	if (ret)
+		return ret;
+
+	ret = hws_matcher_bind_at_idx(matcher, 1);
+	if (ret)
+		goto free_at_0;
+
+	return 0;
+
+free_at_0:
+	hws_matcher_unbind_at_idx(matcher, 0);
+	return ret;
+}
+
+static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher)
+{
+	hws_matcher_unbind_at_idx(matcher, 1);
+	hws_matcher_unbind_at_idx(matcher, 0);
+}
+
+static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	struct mlx5hws_pool_attr pool_attr = {0};
+	int ret;
+
+	/* Calculate match, range and hash definers */
+	if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) {
+		ret = mlx5hws_definer_mt_init(ctx, matcher->mt);
+		if (ret) {
+			if (ret == E2BIG)
+				mlx5hws_err(ctx, "Failed to set matcher templates with match definers\n");
+			return ret;
+		}
+	}
+
+	/* Create an STE pool per matcher*/
+	pool_attr.table_type = matcher->tbl->type;
+	pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
+	pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL;
+	pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log +
+				 matcher->attr.table.sz_row_log;
+	hws_matcher_set_pool_attr(&pool_attr, matcher);
+
+	matcher->match_ste.pool = mlx5hws_pool_create(ctx, &pool_attr);
+	if (!matcher->match_ste.pool) {
+		mlx5hws_err(ctx, "Failed to allocate matcher STE pool\n");
+		ret = -EOPNOTSUPP;
+		goto uninit_match_definer;
+	}
+
+	return 0;
+
+uninit_match_definer:
+	if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION))
+		mlx5hws_definer_mt_uninit(ctx, matcher->mt);
+	return ret;
+}
+
+static void hws_matcher_unbind_mt(struct mlx5hws_matcher *matcher)
+{
+	mlx5hws_pool_destroy(matcher->match_ste.pool);
+	if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION))
+		mlx5hws_definer_mt_uninit(matcher->tbl->ctx, matcher->mt);
+}
+
+static int
+hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps,
+				 struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher_attr *attr = &matcher->attr;
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+
+	switch (attr->insert_mode) {
+	case MLX5HWS_MATCHER_INSERT_BY_HASH:
+		if (matcher->attr.distribute_mode != MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
+			mlx5hws_err(ctx, "Invalid matcher distribute mode\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+
+	case MLX5HWS_MATCHER_INSERT_BY_INDEX:
+		if (attr->table.sz_col_log) {
+			mlx5hws_err(ctx, "Matcher with INSERT_BY_INDEX supports only Nx1 table size\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
+			/* Hash Split Table */
+			if (!caps->rtc_hash_split_table) {
+				mlx5hws_err(ctx, "FW doesn't support insert by index and hash distribute\n");
+				return -EOPNOTSUPP;
+			}
+		} else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
+			/* Linear Lookup Table */
+			if (!caps->rtc_linear_lookup_table ||
+			    !IS_BIT_SET(caps->access_index_mode,
+					MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR)) {
+				mlx5hws_err(ctx, "FW doesn't support insert by index and linear distribute\n");
+				return -EOPNOTSUPP;
+			}
+
+			if (attr->table.sz_row_log > MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) {
+				mlx5hws_err(ctx, "Matcher with linear distribute: rows exceed limit %d",
+					    MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX);
+				return -EOPNOTSUPP;
+			}
+		} else {
+			mlx5hws_err(ctx, "Matcher has unsupported distribute mode\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+
+	default:
+		mlx5hws_err(ctx, "Matcher has unsupported insert mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps,
+			 struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_matcher_attr *attr = &matcher->attr;
+
+	if (hws_matcher_validate_insert_mode(caps, matcher))
+		return -EOPNOTSUPP;
+
+	if (matcher->tbl->type != MLX5HWS_TABLE_TYPE_FDB && attr->optimize_flow_src) {
+		mlx5hws_err(matcher->tbl->ctx, "NIC domain doesn't support flow_src\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Convert number of rules to the required depth */
+	if (attr->mode == MLX5HWS_MATCHER_RESOURCE_MODE_RULE &&
+	    attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH)
+		attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log);
+
+	matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0;
+
+	return hws_matcher_check_attr_sz(caps, matcher);
+}
+
+static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
+{
+	int ret;
+
+	/* Select and create the definers for current matcher */
+	ret = hws_matcher_bind_mt(matcher);
+	if (ret)
+		return ret;
+
+	/* Calculate and verify action combination */
+	ret = hws_matcher_bind_at(matcher);
+	if (ret)
+		goto unbind_mt;
+
+	/* Create matcher end flow table anchor */
+	ret = hws_matcher_create_end_ft(matcher);
+	if (ret)
+		goto unbind_at;
+
+	/* Allocate the RTC for the new matcher */
+	ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+	if (ret)
+		goto destroy_end_ft;
+
+	/* Connect the matcher to the matcher list */
+	ret = hws_matcher_connect(matcher);
+	if (ret)
+		goto destroy_rtc;
+
+	return 0;
+
+destroy_rtc:
+	hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+destroy_end_ft:
+	hws_matcher_destroy_end_ft(matcher);
+unbind_at:
+	hws_matcher_unbind_at(matcher);
+unbind_mt:
+	hws_matcher_unbind_mt(matcher);
+	return ret;
+}
+
+static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher)
+{
+	hws_matcher_resize_uninit(matcher);
+	hws_matcher_disconnect(matcher);
+	hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+	hws_matcher_destroy_end_ft(matcher);
+	hws_matcher_unbind_at(matcher);
+	hws_matcher_unbind_mt(matcher);
+}
+
+static int
+hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	struct mlx5hws_matcher *col_matcher;
+	int ret;
+
+	if (matcher->attr.mode != MLX5HWS_MATCHER_RESOURCE_MODE_RULE ||
+	    matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX)
+		return 0;
+
+	if (!hws_matcher_requires_col_tbl(matcher->attr.rule.num_log))
+		return 0;
+
+	col_matcher = kzalloc(sizeof(*matcher), GFP_KERNEL);
+	if (!col_matcher)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&col_matcher->resize_data);
+
+	col_matcher->tbl = matcher->tbl;
+	col_matcher->mt = matcher->mt;
+	col_matcher->at = matcher->at;
+	col_matcher->num_of_at = matcher->num_of_at;
+	col_matcher->num_of_mt = matcher->num_of_mt;
+	col_matcher->attr.priority = matcher->attr.priority;
+	col_matcher->flags = matcher->flags;
+	col_matcher->flags |= MLX5HWS_MATCHER_FLAGS_COLLISION;
+	col_matcher->attr.mode = MLX5HWS_MATCHER_RESOURCE_MODE_HTABLE;
+	col_matcher->attr.optimize_flow_src = matcher->attr.optimize_flow_src;
+	col_matcher->attr.table.sz_row_log = matcher->attr.rule.num_log;
+	col_matcher->attr.table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH;
+	if (col_matcher->attr.table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO)
+		col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO;
+
+	col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach;
+
+	ret = hws_matcher_process_attr(ctx->caps, col_matcher);
+	if (ret)
+		goto free_col_matcher;
+
+	ret = hws_matcher_create_and_connect(col_matcher);
+	if (ret)
+		goto free_col_matcher;
+
+	matcher->col_matcher = col_matcher;
+
+	return 0;
+
+free_col_matcher:
+	kfree(col_matcher);
+	mlx5hws_err(ctx, "Failed to create assured collision matcher\n");
+	return ret;
+}
+
+static void
+hws_matcher_destroy_col_matcher(struct mlx5hws_matcher *matcher)
+{
+	if (matcher->attr.mode != MLX5HWS_MATCHER_RESOURCE_MODE_RULE ||
+	    matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX)
+		return;
+
+	if (matcher->col_matcher) {
+		hws_matcher_destroy_and_disconnect(matcher->col_matcher);
+		kfree(matcher->col_matcher);
+	}
+}
+
+static int hws_matcher_init(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	int ret;
+
+	INIT_LIST_HEAD(&matcher->resize_data);
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	/* Allocate matcher resource and connect to the packet pipe */
+	ret = hws_matcher_create_and_connect(matcher);
+	if (ret)
+		goto unlock_err;
+
+	/* Create additional matcher for collision handling */
+	ret = hws_matcher_create_col_matcher(matcher);
+	if (ret)
+		goto destory_and_disconnect;
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+
+destory_and_disconnect:
+	hws_matcher_destroy_and_disconnect(matcher);
+unlock_err:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+static int hws_matcher_uninit(struct mlx5hws_matcher *matcher)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+
+	mutex_lock(&ctx->ctrl_lock);
+	hws_matcher_destroy_col_matcher(matcher);
+	hws_matcher_destroy_and_disconnect(matcher);
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+}
+
+int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
+			      struct mlx5hws_action_template *at)
+{
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	u32 required_stes;
+	int ret;
+
+	if (!matcher->attr.max_num_of_at_attach) {
+		mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n",
+			    matcher->num_of_at);
+		return -EOPNOTSUPP;
+	}
+
+	ret = hws_matcher_check_and_process_at(matcher, at);
+	if (ret)
+		return ret;
+
+	required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
+	if (matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes < required_stes) {
+		mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n",
+			    required_stes,
+			    matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes);
+		return -ENOMEM;
+	}
+
+	matcher->at[matcher->num_of_at] = *at;
+	matcher->num_of_at += 1;
+	matcher->attr.max_num_of_at_attach -= 1;
+
+	if (matcher->col_matcher)
+		matcher->col_matcher->num_of_at = matcher->num_of_at;
+
+	return 0;
+}
+
+static int
+hws_matcher_set_templates(struct mlx5hws_matcher *matcher,
+			  struct mlx5hws_match_template *mt[],
+			  u8 num_of_mt,
+			  struct mlx5hws_action_template *at[],
+			  u8 num_of_at)
+{
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	int ret = 0;
+	int i;
+
+	if (!num_of_mt || !num_of_at) {
+		mlx5hws_err(ctx, "Number of action/match template cannot be zero\n");
+		return -EOPNOTSUPP;
+	}
+
+	matcher->mt = kcalloc(num_of_mt, sizeof(*matcher->mt), GFP_KERNEL);
+	if (!matcher->mt)
+		return -ENOMEM;
+
+	matcher->at = kcalloc(num_of_at + matcher->attr.max_num_of_at_attach,
+			      sizeof(*matcher->at),
+			      GFP_KERNEL);
+	if (!matcher->at) {
+		mlx5hws_err(ctx, "Failed to allocate action template array\n");
+		ret = -ENOMEM;
+		goto free_mt;
+	}
+
+	for (i = 0; i < num_of_mt; i++)
+		matcher->mt[i] = *mt[i];
+
+	for (i = 0; i < num_of_at; i++)
+		matcher->at[i] = *at[i];
+
+	matcher->num_of_mt = num_of_mt;
+	matcher->num_of_at = num_of_at;
+
+	return 0;
+
+free_mt:
+	kfree(matcher->mt);
+	return ret;
+}
+
+static void
+hws_matcher_unset_templates(struct mlx5hws_matcher *matcher)
+{
+	kfree(matcher->at);
+	kfree(matcher->mt);
+}
+
+struct mlx5hws_matcher *
+mlx5hws_matcher_create(struct mlx5hws_table *tbl,
+		       struct mlx5hws_match_template *mt[],
+		       u8 num_of_mt,
+		       struct mlx5hws_action_template *at[],
+		       u8 num_of_at,
+		       struct mlx5hws_matcher_attr *attr)
+{
+	struct mlx5hws_context *ctx = tbl->ctx;
+	struct mlx5hws_matcher *matcher;
+	int ret;
+
+	matcher = kzalloc(sizeof(*matcher), GFP_KERNEL);
+	if (!matcher)
+		return NULL;
+
+	matcher->tbl = tbl;
+	matcher->attr = *attr;
+
+	ret = hws_matcher_process_attr(tbl->ctx->caps, matcher);
+	if (ret)
+		goto free_matcher;
+
+	ret = hws_matcher_set_templates(matcher, mt, num_of_mt, at, num_of_at);
+	if (ret)
+		goto free_matcher;
+
+	ret = hws_matcher_init(matcher);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to initialise matcher: %d\n", ret);
+		goto unset_templates;
+	}
+
+	return matcher;
+
+unset_templates:
+	hws_matcher_unset_templates(matcher);
+free_matcher:
+	kfree(matcher);
+	return NULL;
+}
+
+int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher)
+{
+	hws_matcher_uninit(matcher);
+	hws_matcher_unset_templates(matcher);
+	kfree(matcher);
+	return 0;
+}
+
+struct mlx5hws_match_template *
+mlx5hws_match_template_create(struct mlx5hws_context *ctx,
+			      u32 *match_param,
+			      u32 match_param_sz,
+			      u8 match_criteria_enable)
+{
+	struct mlx5hws_match_template *mt;
+
+	mt = kzalloc(sizeof(*mt), GFP_KERNEL);
+	if (!mt)
+		return NULL;
+
+	mt->match_param = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	if (!mt->match_param)
+		goto free_template;
+
+	memcpy(mt->match_param, match_param, match_param_sz);
+	mt->match_criteria_enable = match_criteria_enable;
+
+	return mt;
+
+free_template:
+	kfree(mt);
+	return NULL;
+}
+
+int mlx5hws_match_template_destroy(struct mlx5hws_match_template *mt)
+{
+	kfree(mt->match_param);
+	kfree(mt);
+	return 0;
+}
+
+static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher,
+				       struct mlx5hws_matcher *dst_matcher)
+{
+	struct mlx5hws_context *ctx = src_matcher->tbl->ctx;
+	int i;
+
+	if (src_matcher->tbl->type != dst_matcher->tbl->type) {
+		mlx5hws_err(ctx, "Table type mismatch for src/dst matchers\n");
+		return -EINVAL;
+	}
+
+	if (!mlx5hws_matcher_is_resizable(src_matcher) ||
+	    !mlx5hws_matcher_is_resizable(dst_matcher)) {
+		mlx5hws_err(ctx, "Src/dst matcher is not resizable\n");
+		return -EINVAL;
+	}
+
+	if (mlx5hws_matcher_is_insert_by_idx(src_matcher) !=
+	    mlx5hws_matcher_is_insert_by_idx(dst_matcher)) {
+		mlx5hws_err(ctx, "Src/dst matchers insert mode mismatch\n");
+		return -EINVAL;
+	}
+
+	if (mlx5hws_matcher_is_in_resize(src_matcher) ||
+	    mlx5hws_matcher_is_in_resize(dst_matcher)) {
+		mlx5hws_err(ctx, "Src/dst matcher is already in resize\n");
+		return -EINVAL;
+	}
+
+	/* Compare match templates - make sure the definers are equivalent */
+	if (src_matcher->num_of_mt != dst_matcher->num_of_mt) {
+		mlx5hws_err(ctx, "Src/dst matcher match templates mismatch\n");
+		return -EINVAL;
+	}
+
+	if (src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes >
+	    dst_matcher->action_ste[0].max_stes) {
+		mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < src_matcher->num_of_mt; i++) {
+		if (mlx5hws_definer_compare(src_matcher->mt[i].definer,
+					    dst_matcher->mt[i].definer)) {
+			mlx5hws_err(ctx, "Src/dst matcher definers mismatch\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher,
+				      struct mlx5hws_matcher *dst_matcher)
+{
+	int ret = 0;
+
+	mutex_lock(&src_matcher->tbl->ctx->ctrl_lock);
+
+	ret = hws_matcher_resize_precheck(src_matcher, dst_matcher);
+	if (ret)
+		goto out;
+
+	src_matcher->resize_dst = dst_matcher;
+
+	ret = hws_matcher_resize_init(src_matcher);
+	if (ret)
+		src_matcher->resize_dst = NULL;
+
+out:
+	mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock);
+	return ret;
+}
+
+int mlx5hws_matcher_resize_rule_move(struct mlx5hws_matcher *src_matcher,
+				     struct mlx5hws_rule *rule,
+				     struct mlx5hws_rule_attr *attr)
+{
+	struct mlx5hws_context *ctx = src_matcher->tbl->ctx;
+
+	if (unlikely(!mlx5hws_matcher_is_in_resize(src_matcher))) {
+		mlx5hws_err(ctx, "Matcher is not resizable or not in resize\n");
+		return -EINVAL;
+	}
+
+	if (unlikely(src_matcher != rule->matcher)) {
+		mlx5hws_err(ctx, "Rule doesn't belong to src matcher\n");
+		return -EINVAL;
+	}
+
+	return mlx5hws_rule_move_hws_add(rule, attr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.h
new file mode 100644
index 000000000000..125391d1a114
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_MATCHER_H_
+#define MLX5HWS_MATCHER_H_
+
+/* We calculated that concatenating a collision table to the main table with
+ * 3% of the main table rows will be enough resources for high insertion
+ * success probability.
+ *
+ * The calculation: log2(2^x * 3 / 100) = log2(2^x) + log2(3/100) = x - 5.05 ~ 5
+ */
+#define MLX5HWS_MATCHER_ASSURED_ROW_RATIO 5
+/* Threshold to determine if amount of rules require a collision table */
+#define MLX5HWS_MATCHER_ASSURED_RULES_TH 10
+/* Required depth of an assured collision table */
+#define MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH 4
+/* Required depth of the main large table */
+#define MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH 2
+
+enum mlx5hws_matcher_offset {
+	MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12,
+	MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13,
+};
+
+enum mlx5hws_matcher_flags {
+	MLX5HWS_MATCHER_FLAGS_COLLISION = 1 << 2,
+	MLX5HWS_MATCHER_FLAGS_RESIZABLE	= 1 << 3,
+};
+
+struct mlx5hws_match_template {
+	struct mlx5hws_definer *definer;
+	struct mlx5hws_definer_fc *fc;
+	u32 *match_param;
+	u8 match_criteria_enable;
+	u16 fc_sz;
+};
+
+struct mlx5hws_matcher_match_ste {
+	struct mlx5hws_pool_chunk ste;
+	u32 rtc_0_id;
+	u32 rtc_1_id;
+	struct mlx5hws_pool *pool;
+};
+
+struct mlx5hws_matcher_action_ste {
+	struct mlx5hws_pool_chunk ste;
+	struct mlx5hws_pool_chunk stc;
+	u32 rtc_0_id;
+	u32 rtc_1_id;
+	struct mlx5hws_pool *pool;
+	u8 max_stes;
+};
+
+struct mlx5hws_matcher_resize_data_node {
+	struct mlx5hws_pool_chunk stc;
+	u32 rtc_0_id;
+	u32 rtc_1_id;
+	struct mlx5hws_pool *pool;
+};
+
+struct mlx5hws_matcher_resize_data {
+	struct mlx5hws_matcher_resize_data_node action_ste[2];
+	u8 max_stes;
+	struct list_head list_node;
+};
+
+struct mlx5hws_matcher {
+	struct mlx5hws_table *tbl;
+	struct mlx5hws_matcher_attr attr;
+	struct mlx5hws_match_template *mt;
+	struct mlx5hws_action_template *at;
+	u8 num_of_at;
+	u8 num_of_mt;
+	/* enum mlx5hws_matcher_flags */
+	u8 flags;
+	u32 end_ft_id;
+	struct mlx5hws_matcher *col_matcher;
+	struct mlx5hws_matcher *resize_dst;
+	struct mlx5hws_matcher_match_ste match_ste;
+	struct mlx5hws_matcher_action_ste action_ste[2];
+	struct list_head list_node;
+	struct list_head resize_data;
+};
+
+static inline bool
+mlx5hws_matcher_mt_is_jumbo(struct mlx5hws_match_template *mt)
+{
+	return mlx5hws_definer_is_jumbo(mt->definer);
+}
+
+static inline bool mlx5hws_matcher_is_resizable(struct mlx5hws_matcher *matcher)
+{
+	return !!(matcher->flags & MLX5HWS_MATCHER_FLAGS_RESIZABLE);
+}
+
+static inline bool mlx5hws_matcher_is_in_resize(struct mlx5hws_matcher *matcher)
+{
+	return !!matcher->resize_dst;
+}
+
+static inline bool mlx5hws_matcher_is_insert_by_idx(struct mlx5hws_matcher *matcher)
+{
+	return matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX;
+}
+
+#endif /* MLX5HWS_MATCHER_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.c
new file mode 100644
index 000000000000..e084a5cbf81f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+enum mlx5hws_arg_chunk_size
+mlx5hws_arg_data_size_to_arg_log_size(u16 data_size)
+{
+	/* Return the roundup of log2(data_size) */
+	if (data_size <= MLX5HWS_ARG_DATA_SIZE)
+		return MLX5HWS_ARG_CHUNK_SIZE_1;
+	if (data_size <= MLX5HWS_ARG_DATA_SIZE * 2)
+		return MLX5HWS_ARG_CHUNK_SIZE_2;
+	if (data_size <= MLX5HWS_ARG_DATA_SIZE * 4)
+		return MLX5HWS_ARG_CHUNK_SIZE_3;
+	if (data_size <= MLX5HWS_ARG_DATA_SIZE * 8)
+		return MLX5HWS_ARG_CHUNK_SIZE_4;
+
+	return MLX5HWS_ARG_CHUNK_SIZE_MAX;
+}
+
+u32 mlx5hws_arg_data_size_to_arg_size(u16 data_size)
+{
+	return BIT(mlx5hws_arg_data_size_to_arg_log_size(data_size));
+}
+
+enum mlx5hws_arg_chunk_size
+mlx5hws_arg_get_arg_log_size(u16 num_of_actions)
+{
+	return mlx5hws_arg_data_size_to_arg_log_size(num_of_actions *
+						    MLX5HWS_MODIFY_ACTION_SIZE);
+}
+
+u32 mlx5hws_arg_get_arg_size(u16 num_of_actions)
+{
+	return BIT(mlx5hws_arg_get_arg_log_size(num_of_actions));
+}
+
+bool mlx5hws_pat_require_reparse(__be64 *actions, u16 num_of_actions)
+{
+	u16 i, field;
+	u8 action_id;
+
+	for (i = 0; i < num_of_actions; i++) {
+		action_id = MLX5_GET(set_action_in, &actions[i], action_type);
+
+		switch (action_id) {
+		case MLX5_MODIFICATION_TYPE_NOP:
+			field = MLX5_MODI_OUT_NONE;
+			break;
+
+		case MLX5_MODIFICATION_TYPE_SET:
+		case MLX5_MODIFICATION_TYPE_ADD:
+			field = MLX5_GET(set_action_in, &actions[i], field);
+			break;
+
+		case MLX5_MODIFICATION_TYPE_COPY:
+		case MLX5_MODIFICATION_TYPE_ADD_FIELD:
+			field = MLX5_GET(copy_action_in, &actions[i], dst_field);
+			break;
+
+		default:
+			/* Insert/Remove/Unknown actions require reparse */
+			return true;
+		}
+
+		/* Below fields can change packet structure require a reparse */
+		if (field == MLX5_MODI_OUT_ETHERTYPE ||
+		    field == MLX5_MODI_OUT_IPV6_NEXT_HDR)
+			return true;
+	}
+
+	return false;
+}
+
+/* Cache and cache element handling */
+int mlx5hws_pat_init_pattern_cache(struct mlx5hws_pattern_cache **cache)
+{
+	struct mlx5hws_pattern_cache *new_cache;
+
+	new_cache = kzalloc(sizeof(*new_cache), GFP_KERNEL);
+	if (!new_cache)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&new_cache->ptrn_list);
+	mutex_init(&new_cache->lock);
+
+	*cache = new_cache;
+
+	return 0;
+}
+
+void mlx5hws_pat_uninit_pattern_cache(struct mlx5hws_pattern_cache *cache)
+{
+	mutex_destroy(&cache->lock);
+	kfree(cache);
+}
+
+static bool mlx5hws_pat_compare_pattern(int cur_num_of_actions,
+					__be64 cur_actions[],
+					int num_of_actions,
+					__be64 actions[])
+{
+	int i;
+
+	if (cur_num_of_actions != num_of_actions)
+		return false;
+
+	for (i = 0; i < num_of_actions; i++) {
+		u8 action_id =
+			MLX5_GET(set_action_in, &actions[i], action_type);
+
+		if (action_id == MLX5_MODIFICATION_TYPE_COPY ||
+		    action_id == MLX5_MODIFICATION_TYPE_ADD_FIELD) {
+			if (actions[i] != cur_actions[i])
+				return false;
+		} else {
+			/* Compare just the control, not the values */
+			if ((__force __be32)actions[i] !=
+			    (__force __be32)cur_actions[i])
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static struct mlx5hws_pattern_cache_item *
+mlx5hws_pat_find_cached_pattern(struct mlx5hws_pattern_cache *cache,
+				u16 num_of_actions,
+				__be64 *actions)
+{
+	struct mlx5hws_pattern_cache_item *cached_pat = NULL;
+
+	list_for_each_entry(cached_pat, &cache->ptrn_list, ptrn_list_node) {
+		if (mlx5hws_pat_compare_pattern(cached_pat->mh_data.num_of_actions,
+						(__be64 *)cached_pat->mh_data.data,
+						num_of_actions,
+						actions))
+			return cached_pat;
+	}
+
+	return NULL;
+}
+
+static struct mlx5hws_pattern_cache_item *
+mlx5hws_pat_get_existing_cached_pattern(struct mlx5hws_pattern_cache *cache,
+					u16 num_of_actions,
+					__be64 *actions)
+{
+	struct mlx5hws_pattern_cache_item *cached_pattern;
+
+	cached_pattern = mlx5hws_pat_find_cached_pattern(cache, num_of_actions, actions);
+	if (cached_pattern) {
+		/* LRU: move it to be first in the list */
+		list_del_init(&cached_pattern->ptrn_list_node);
+		list_add(&cached_pattern->ptrn_list_node, &cache->ptrn_list);
+		cached_pattern->refcount++;
+	}
+
+	return cached_pattern;
+}
+
+static struct mlx5hws_pattern_cache_item *
+mlx5hws_pat_add_pattern_to_cache(struct mlx5hws_pattern_cache *cache,
+				 u32 pattern_id,
+				 u16 num_of_actions,
+				 __be64 *actions)
+{
+	struct mlx5hws_pattern_cache_item *cached_pattern;
+
+	cached_pattern = kzalloc(sizeof(*cached_pattern), GFP_KERNEL);
+	if (!cached_pattern)
+		return NULL;
+
+	cached_pattern->mh_data.num_of_actions = num_of_actions;
+	cached_pattern->mh_data.pattern_id = pattern_id;
+	cached_pattern->mh_data.data =
+		kmemdup(actions, num_of_actions * MLX5HWS_MODIFY_ACTION_SIZE, GFP_KERNEL);
+	if (!cached_pattern->mh_data.data)
+		goto free_cached_obj;
+
+	list_add(&cached_pattern->ptrn_list_node, &cache->ptrn_list);
+	cached_pattern->refcount = 1;
+
+	return cached_pattern;
+
+free_cached_obj:
+	kfree(cached_pattern);
+	return NULL;
+}
+
+static struct mlx5hws_pattern_cache_item *
+mlx5hws_pat_find_cached_pattern_by_id(struct mlx5hws_pattern_cache *cache,
+				      u32 ptrn_id)
+{
+	struct mlx5hws_pattern_cache_item *cached_pattern = NULL;
+
+	list_for_each_entry(cached_pattern, &cache->ptrn_list, ptrn_list_node) {
+		if (cached_pattern->mh_data.pattern_id == ptrn_id)
+			return cached_pattern;
+	}
+
+	return NULL;
+}
+
+static void
+mlx5hws_pat_remove_pattern(struct mlx5hws_pattern_cache_item *cached_pattern)
+{
+	list_del_init(&cached_pattern->ptrn_list_node);
+
+	kfree(cached_pattern->mh_data.data);
+	kfree(cached_pattern);
+}
+
+void mlx5hws_pat_put_pattern(struct mlx5hws_context *ctx, u32 ptrn_id)
+{
+	struct mlx5hws_pattern_cache *cache = ctx->pattern_cache;
+	struct mlx5hws_pattern_cache_item *cached_pattern;
+
+	mutex_lock(&cache->lock);
+	cached_pattern = mlx5hws_pat_find_cached_pattern_by_id(cache, ptrn_id);
+	if (!cached_pattern) {
+		mlx5hws_err(ctx, "Failed to find cached pattern with provided ID\n");
+		pr_warn("HWS: pattern ID %d is not found\n", ptrn_id);
+		goto out;
+	}
+
+	if (--cached_pattern->refcount)
+		goto out;
+
+	mlx5hws_pat_remove_pattern(cached_pattern);
+	mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id);
+
+out:
+	mutex_unlock(&cache->lock);
+}
+
+int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx,
+			    __be64 *pattern, size_t pattern_sz,
+			    u32 *pattern_id)
+{
+	u16 num_of_actions = pattern_sz / MLX5HWS_MODIFY_ACTION_SIZE;
+	struct mlx5hws_pattern_cache_item *cached_pattern;
+	u32 ptrn_id = 0;
+	int ret = 0;
+
+	mutex_lock(&ctx->pattern_cache->lock);
+
+	cached_pattern = mlx5hws_pat_get_existing_cached_pattern(ctx->pattern_cache,
+								 num_of_actions,
+								 pattern);
+	if (cached_pattern) {
+		*pattern_id = cached_pattern->mh_data.pattern_id;
+		goto out_unlock;
+	}
+
+	ret = mlx5hws_cmd_header_modify_pattern_create(ctx->mdev,
+						       pattern_sz,
+						       (u8 *)pattern,
+						       &ptrn_id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to create pattern FW object\n");
+		goto out_unlock;
+	}
+
+	cached_pattern = mlx5hws_pat_add_pattern_to_cache(ctx->pattern_cache,
+							  ptrn_id,
+							  num_of_actions,
+							  pattern);
+	if (!cached_pattern) {
+		mlx5hws_err(ctx, "Failed to add pattern to cache\n");
+		ret = -EINVAL;
+		goto clean_pattern;
+	}
+
+	mutex_unlock(&ctx->pattern_cache->lock);
+	*pattern_id = ptrn_id;
+
+	return ret;
+
+clean_pattern:
+	mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id);
+out_unlock:
+	mutex_unlock(&ctx->pattern_cache->lock);
+	return ret;
+}
+
+static void
+mlx5d_arg_init_send_attr(struct mlx5hws_send_engine_post_attr *send_attr,
+			 void *comp_data,
+			 u32 arg_idx)
+{
+	send_attr->opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	send_attr->opmod = MLX5HWS_WQE_GTA_OPMOD_MOD_ARG;
+	send_attr->len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+	send_attr->id = arg_idx;
+	send_attr->user_data = comp_data;
+}
+
+void mlx5hws_arg_decapl3_write(struct mlx5hws_send_engine *queue,
+			       u32 arg_idx,
+			       u8 *arg_data,
+			       u16 num_of_actions)
+{
+	struct mlx5hws_send_engine_post_attr send_attr = {0};
+	struct mlx5hws_wqe_gta_data_seg_arg *wqe_arg = NULL;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl = NULL;
+	struct mlx5hws_send_engine_post_ctrl ctrl;
+	size_t wqe_len;
+
+	mlx5d_arg_init_send_attr(&send_attr, NULL, arg_idx);
+
+	ctrl = mlx5hws_send_engine_post_start(queue);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+	memset(wqe_ctrl, 0, wqe_len);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len);
+	mlx5hws_action_prepare_decap_l3_data(arg_data, (u8 *)wqe_arg,
+					     num_of_actions);
+	mlx5hws_send_engine_post_end(&ctrl, &send_attr);
+}
+
+void mlx5hws_arg_write(struct mlx5hws_send_engine *queue,
+		       void *comp_data,
+		       u32 arg_idx,
+		       u8 *arg_data,
+		       size_t data_size)
+{
+	struct mlx5hws_send_engine_post_attr send_attr = {0};
+	struct mlx5hws_wqe_gta_data_seg_arg *wqe_arg;
+	struct mlx5hws_send_engine_post_ctrl ctrl;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
+	int i, full_iter, leftover;
+	size_t wqe_len;
+
+	mlx5d_arg_init_send_attr(&send_attr, comp_data, arg_idx);
+
+	/* Each WQE can hold 64B of data, it might require multiple iteration */
+	full_iter = data_size / MLX5HWS_ARG_DATA_SIZE;
+	leftover = data_size & (MLX5HWS_ARG_DATA_SIZE - 1);
+
+	for (i = 0; i < full_iter; i++) {
+		ctrl = mlx5hws_send_engine_post_start(queue);
+		mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+		memset(wqe_ctrl, 0, wqe_len);
+		mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len);
+		memcpy(wqe_arg, arg_data, wqe_len);
+		send_attr.id = arg_idx++;
+		mlx5hws_send_engine_post_end(&ctrl, &send_attr);
+
+		/* Move to next argument data */
+		arg_data += MLX5HWS_ARG_DATA_SIZE;
+	}
+
+	if (leftover) {
+		ctrl = mlx5hws_send_engine_post_start(queue);
+		mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+		memset(wqe_ctrl, 0, wqe_len);
+		mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len);
+		memcpy(wqe_arg, arg_data, leftover);
+		send_attr.id = arg_idx;
+		mlx5hws_send_engine_post_end(&ctrl, &send_attr);
+	}
+}
+
+int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx,
+				      u32 arg_idx,
+				      u8 *arg_data,
+				      size_t data_size)
+{
+	struct mlx5hws_send_engine *queue;
+	int ret;
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	/* Get the control queue */
+	queue = &ctx->send_queue[ctx->queues - 1];
+
+	mlx5hws_arg_write(queue, arg_data, arg_idx, arg_data, data_size);
+
+	mlx5hws_send_engine_flush_queue(queue);
+
+	/* Poll for completion */
+	ret = mlx5hws_send_queue_action(ctx, ctx->queues - 1,
+					MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
+
+	if (ret)
+		mlx5hws_err(ctx, "Failed to drain arg queue\n");
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return ret;
+}
+
+bool mlx5hws_arg_is_valid_arg_request_size(struct mlx5hws_context *ctx,
+					   u32 arg_size)
+{
+	if (arg_size < ctx->caps->log_header_modify_argument_granularity ||
+	    arg_size > ctx->caps->log_header_modify_argument_max_alloc) {
+		return false;
+	}
+	return true;
+}
+
+int mlx5hws_arg_create(struct mlx5hws_context *ctx,
+		       u8 *data,
+		       size_t data_sz,
+		       u32 log_bulk_sz,
+		       bool write_data,
+		       u32 *arg_id)
+{
+	u16 single_arg_log_sz;
+	u16 multi_arg_log_sz;
+	int ret;
+	u32 id;
+
+	single_arg_log_sz = mlx5hws_arg_data_size_to_arg_log_size(data_sz);
+	multi_arg_log_sz = single_arg_log_sz + log_bulk_sz;
+
+	if (single_arg_log_sz >= MLX5HWS_ARG_CHUNK_SIZE_MAX) {
+		mlx5hws_err(ctx, "Requested single arg %u not supported\n", single_arg_log_sz);
+		return -EOPNOTSUPP;
+	}
+
+	if (!mlx5hws_arg_is_valid_arg_request_size(ctx, multi_arg_log_sz)) {
+		mlx5hws_err(ctx, "Argument log size %d not supported by FW\n", multi_arg_log_sz);
+		return -EOPNOTSUPP;
+	}
+
+	/* Alloc bulk of args */
+	ret = mlx5hws_cmd_arg_create(ctx->mdev, multi_arg_log_sz, ctx->pd_num, &id);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed allocating arg in order: %d\n", multi_arg_log_sz);
+		return ret;
+	}
+
+	if (write_data) {
+		ret = mlx5hws_arg_write_inline_arg_data(ctx, id,
+							data, data_sz);
+		if (ret) {
+			mlx5hws_err(ctx, "Failed writing arg data\n");
+			mlx5hws_cmd_arg_destroy(ctx->mdev, id);
+			return ret;
+		}
+	}
+
+	*arg_id = id;
+	return ret;
+}
+
+void mlx5hws_arg_destroy(struct mlx5hws_context *ctx, u32 arg_id)
+{
+	mlx5hws_cmd_arg_destroy(ctx->mdev, arg_id);
+}
+
+int mlx5hws_arg_create_modify_header_arg(struct mlx5hws_context *ctx,
+					 __be64 *data,
+					 u8 num_of_actions,
+					 u32 log_bulk_sz,
+					 bool write_data,
+					 u32 *arg_id)
+{
+	size_t data_sz = num_of_actions * MLX5HWS_MODIFY_ACTION_SIZE;
+	int ret;
+
+	ret = mlx5hws_arg_create(ctx,
+				 (u8 *)data,
+				 data_sz,
+				 log_bulk_sz,
+				 write_data,
+				 arg_id);
+	if (ret)
+		mlx5hws_err(ctx, "Failed creating modify header arg\n");
+
+	return ret;
+}
+
+static int
+hws_action_modify_check_field_limitation(u8 action_type, __be64 *pattern)
+{
+	/* Need to check field limitation here, but for now - return OK */
+	return 0;
+}
+
+#define INVALID_FIELD 0xffff
+
+static void
+hws_action_modify_get_target_fields(u8 action_type, __be64 *pattern,
+				    u16 *src_field, u16 *dst_field)
+{
+	switch (action_type) {
+	case MLX5_ACTION_TYPE_SET:
+	case MLX5_ACTION_TYPE_ADD:
+		*src_field = MLX5_GET(set_action_in, pattern, field);
+		*dst_field = INVALID_FIELD;
+		break;
+	case MLX5_ACTION_TYPE_COPY:
+		*src_field = MLX5_GET(copy_action_in, pattern, src_field);
+		*dst_field = MLX5_GET(copy_action_in, pattern, dst_field);
+		break;
+	default:
+		pr_warn("HWS: invalid modify header action type %d\n", action_type);
+	}
+}
+
+bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], size_t sz)
+{
+	size_t i;
+
+	for (i = 0; i < sz / MLX5HWS_MODIFY_ACTION_SIZE; i++) {
+		u8 action_type =
+			MLX5_GET(set_action_in, &pattern[i], action_type);
+		if (action_type >= MLX5_MODIFICATION_TYPE_MAX) {
+			mlx5hws_err(ctx, "Unsupported action id %d\n", action_type);
+			return false;
+		}
+		if (hws_action_modify_check_field_limitation(action_type, &pattern[i])) {
+			mlx5hws_err(ctx, "Unsupported action number %zu\n", i);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions,
+			   size_t max_actions, size_t *new_size,
+			   u32 *nope_location, __be64 *new_pat)
+{
+	u16 prev_src_field = 0, prev_dst_field = 0;
+	u16 src_field, dst_field;
+	u8 action_type;
+	size_t i, j;
+
+	*new_size = num_actions;
+	*nope_location = 0;
+
+	if (num_actions == 1)
+		return;
+
+	for (i = 0, j = 0; i < num_actions; i++, j++) {
+		action_type = MLX5_GET(set_action_in, &pattern[i], action_type);
+
+		hws_action_modify_get_target_fields(action_type, &pattern[i],
+						    &src_field, &dst_field);
+		if (i % 2) {
+			if (action_type == MLX5_ACTION_TYPE_COPY &&
+			    (prev_src_field == src_field ||
+			     prev_dst_field == dst_field)) {
+				/* need Nope */
+				*new_size += 1;
+				*nope_location |= BIT(i);
+				memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE);
+				MLX5_SET(set_action_in, &new_pat[j],
+					 action_type,
+					 MLX5_MODIFICATION_TYPE_NOP);
+				j++;
+			} else if (prev_src_field == src_field) {
+				/* need Nope*/
+				*new_size += 1;
+				*nope_location |= BIT(i);
+				MLX5_SET(set_action_in, &new_pat[j],
+					 action_type,
+					 MLX5_MODIFICATION_TYPE_NOP);
+				j++;
+			}
+		}
+		memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE);
+		/* check if no more space */
+		if (j > max_actions) {
+			*new_size = num_actions;
+			*nope_location = 0;
+			return;
+		}
+
+		prev_src_field = src_field;
+		prev_dst_field = dst_field;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.h
new file mode 100644
index 000000000000..27ca93385b08
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pat_arg.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_PAT_ARG_H_
+#define MLX5HWS_PAT_ARG_H_
+
+/* Modify-header arg pool */
+enum mlx5hws_arg_chunk_size {
+	MLX5HWS_ARG_CHUNK_SIZE_1,
+	/* Keep MIN updated when changing */
+	MLX5HWS_ARG_CHUNK_SIZE_MIN = MLX5HWS_ARG_CHUNK_SIZE_1,
+	MLX5HWS_ARG_CHUNK_SIZE_2,
+	MLX5HWS_ARG_CHUNK_SIZE_3,
+	MLX5HWS_ARG_CHUNK_SIZE_4,
+	MLX5HWS_ARG_CHUNK_SIZE_MAX,
+};
+
+enum {
+	MLX5HWS_MODIFY_ACTION_SIZE = 8,
+	MLX5HWS_ARG_DATA_SIZE = 64,
+};
+
+struct mlx5hws_pattern_cache {
+	struct mutex lock; /* Protect pattern list */
+	struct list_head ptrn_list;
+};
+
+struct mlx5hws_pattern_cache_item {
+	struct {
+		u32 pattern_id;
+		u8 *data;
+		u16 num_of_actions;
+	} mh_data;
+	u32 refcount;
+	struct list_head ptrn_list_node;
+};
+
+enum mlx5hws_arg_chunk_size
+mlx5hws_arg_get_arg_log_size(u16 num_of_actions);
+
+u32 mlx5hws_arg_get_arg_size(u16 num_of_actions);
+
+enum mlx5hws_arg_chunk_size
+mlx5hws_arg_data_size_to_arg_log_size(u16 data_size);
+
+u32 mlx5hws_arg_data_size_to_arg_size(u16 data_size);
+
+int mlx5hws_pat_init_pattern_cache(struct mlx5hws_pattern_cache **cache);
+
+void mlx5hws_pat_uninit_pattern_cache(struct mlx5hws_pattern_cache *cache);
+
+bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], size_t sz);
+
+int mlx5hws_arg_create(struct mlx5hws_context *ctx,
+		       u8 *data,
+		       size_t data_sz,
+		       u32 log_bulk_sz,
+		       bool write_data,
+		       u32 *arg_id);
+
+void mlx5hws_arg_destroy(struct mlx5hws_context *ctx, u32 arg_id);
+
+int mlx5hws_arg_create_modify_header_arg(struct mlx5hws_context *ctx,
+					 __be64 *data,
+					 u8 num_of_actions,
+					 u32 log_bulk_sz,
+					 bool write_data,
+					 u32 *modify_hdr_arg_id);
+
+int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx,
+			    __be64 *pattern,
+			    size_t pattern_sz,
+			    u32 *ptrn_id);
+
+void mlx5hws_pat_put_pattern(struct mlx5hws_context *ctx,
+			     u32 ptrn_id);
+
+bool mlx5hws_arg_is_valid_arg_request_size(struct mlx5hws_context *ctx,
+					   u32 arg_size);
+
+bool mlx5hws_pat_require_reparse(__be64 *actions, u16 num_of_actions);
+
+void mlx5hws_arg_write(struct mlx5hws_send_engine *queue,
+		       void *comp_data,
+		       u32 arg_idx,
+		       u8 *arg_data,
+		       size_t data_size);
+
+void mlx5hws_arg_decapl3_write(struct mlx5hws_send_engine *queue,
+			       u32 arg_idx,
+			       u8 *arg_data,
+			       u16 num_of_actions);
+
+int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx,
+				      u32 arg_idx,
+				      u8 *arg_data,
+				      size_t data_size);
+
+void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, size_t max_actions,
+			   size_t *new_size, u32 *nope_location, __be64 *new_pat);
+#endif /* MLX5HWS_PAT_ARG_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.c
new file mode 100644
index 000000000000..a8a63e3278be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+#include "mlx5hws_buddy.h"
+
+static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource)
+{
+	switch (resource->pool->type) {
+	case MLX5HWS_POOL_TYPE_STE:
+		mlx5hws_cmd_ste_destroy(resource->pool->ctx->mdev, resource->base_id);
+		break;
+	case MLX5HWS_POOL_TYPE_STC:
+		mlx5hws_cmd_stc_destroy(resource->pool->ctx->mdev, resource->base_id);
+		break;
+	default:
+		break;
+	}
+
+	kfree(resource);
+}
+
+static void hws_pool_resource_free(struct mlx5hws_pool *pool,
+				   int resource_idx)
+{
+	hws_pool_free_one_resource(pool->resource[resource_idx]);
+	pool->resource[resource_idx] = NULL;
+
+	if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
+		hws_pool_free_one_resource(pool->mirror_resource[resource_idx]);
+		pool->mirror_resource[resource_idx] = NULL;
+	}
+}
+
+static struct mlx5hws_pool_resource *
+hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range,
+			     u32 fw_ft_type)
+{
+	struct mlx5hws_cmd_ste_create_attr ste_attr;
+	struct mlx5hws_cmd_stc_create_attr stc_attr;
+	struct mlx5hws_pool_resource *resource;
+	u32 obj_id = 0;
+	int ret;
+
+	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+	if (!resource)
+		return NULL;
+
+	switch (pool->type) {
+	case MLX5HWS_POOL_TYPE_STE:
+		ste_attr.log_obj_range = log_range;
+		ste_attr.table_type = fw_ft_type;
+		ret = mlx5hws_cmd_ste_create(pool->ctx->mdev, &ste_attr, &obj_id);
+		break;
+	case MLX5HWS_POOL_TYPE_STC:
+		stc_attr.log_obj_range = log_range;
+		stc_attr.table_type = fw_ft_type;
+		ret = mlx5hws_cmd_stc_create(pool->ctx->mdev, &stc_attr, &obj_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret) {
+		mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n");
+		goto free_resource;
+	}
+
+	resource->pool = pool;
+	resource->range = 1 << log_range;
+	resource->base_id = obj_id;
+
+	return resource;
+
+free_resource:
+	kfree(resource);
+	return NULL;
+}
+
+static int
+hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx)
+{
+	struct mlx5hws_pool_resource *resource;
+	u32 fw_ft_type, opt_log_range;
+
+	fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false);
+	opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range;
+	resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
+	if (!resource) {
+		mlx5hws_err(pool->ctx, "Failed allocating resource\n");
+		return -EINVAL;
+	}
+
+	pool->resource[idx] = resource;
+
+	if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
+		struct mlx5hws_pool_resource *mirror_resource;
+
+		fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true);
+		opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range;
+		mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
+		if (!mirror_resource) {
+			mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n");
+			hws_pool_free_one_resource(resource);
+			pool->resource[idx] = NULL;
+			return -EINVAL;
+		}
+		pool->mirror_resource[idx] = mirror_resource;
+	}
+
+	return 0;
+}
+
+static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range)
+{
+	unsigned long *cur_bmp;
+
+	cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL);
+	if (!cur_bmp)
+		return NULL;
+
+	bitmap_fill(cur_bmp, 1 << log_range);
+
+	return cur_bmp;
+}
+
+static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
+					struct mlx5hws_pool_chunk *chunk)
+{
+	struct mlx5hws_buddy_mem *buddy;
+
+	buddy = pool->db.buddy_manager->buddies[chunk->resource_idx];
+	if (!buddy) {
+		mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx);
+		return;
+	}
+
+	mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order);
+}
+
+static struct mlx5hws_buddy_mem *
+hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx,
+			      u32 order, bool *is_new_buddy)
+{
+	static struct mlx5hws_buddy_mem *buddy;
+	u32 new_buddy_size;
+
+	buddy = pool->db.buddy_manager->buddies[idx];
+	if (buddy)
+		return buddy;
+
+	new_buddy_size = max(pool->alloc_log_sz, order);
+	*is_new_buddy = true;
+	buddy = mlx5hws_buddy_create(new_buddy_size);
+	if (!buddy) {
+		mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n",
+			    new_buddy_size, idx);
+		return NULL;
+	}
+
+	if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) {
+		mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
+			    pool->type, new_buddy_size, idx);
+		mlx5hws_buddy_cleanup(buddy);
+		return NULL;
+	}
+
+	pool->db.buddy_manager->buddies[idx] = buddy;
+
+	return buddy;
+}
+
+static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
+					int order,
+					u32 *buddy_idx,
+					int *seg)
+{
+	struct mlx5hws_buddy_mem *buddy;
+	bool new_mem = false;
+	int ret = 0;
+	int i;
+
+	*seg = -1;
+
+	/* Find the next free place from the buddy array */
+	while (*seg == -1) {
+		for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
+			buddy = hws_pool_buddy_get_next_buddy(pool, i,
+							      order,
+							      &new_mem);
+			if (!buddy) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			*seg = mlx5hws_buddy_alloc_mem(buddy, order);
+			if (*seg != -1)
+				goto found;
+
+			if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) {
+				mlx5hws_err(pool->ctx,
+					    "Fail to allocate seg for one resource pool\n");
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			if (new_mem) {
+				/* We have new memory pool, should be place for us */
+				mlx5hws_err(pool->ctx,
+					    "No memory for order: %d with buddy no: %d\n",
+					    order, i);
+				ret = -ENOMEM;
+				goto out;
+			}
+		}
+	}
+
+found:
+	*buddy_idx = i;
+out:
+	return ret;
+}
+
+static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
+				       struct mlx5hws_pool_chunk *chunk)
+{
+	int ret = 0;
+
+	/* Go over the buddies and find next free slot */
+	ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order,
+					   &chunk->resource_idx,
+					   &chunk->offset);
+	if (ret)
+		mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
+			    chunk->order);
+
+	return ret;
+}
+
+static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool)
+{
+	struct mlx5hws_buddy_mem *buddy;
+	int i;
+
+	for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
+		buddy = pool->db.buddy_manager->buddies[i];
+		if (buddy) {
+			mlx5hws_buddy_cleanup(buddy);
+			kfree(buddy);
+			pool->db.buddy_manager->buddies[i] = NULL;
+		}
+	}
+
+	kfree(pool->db.buddy_manager);
+}
+
+static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
+{
+	pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL);
+	if (!pool->db.buddy_manager)
+		return -ENOMEM;
+
+	if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) {
+		bool new_buddy;
+
+		if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) {
+			mlx5hws_err(pool->ctx,
+				    "Failed allocating memory on create log_sz: %d\n", log_range);
+			kfree(pool->db.buddy_manager);
+			return -ENOMEM;
+		}
+	}
+
+	pool->p_db_uninit = &hws_pool_buddy_db_uninit;
+	pool->p_get_chunk = &hws_pool_buddy_db_get_chunk;
+	pool->p_put_chunk = &hws_pool_buddy_db_put_chunk;
+
+	return 0;
+}
+
+static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool,
+					     u32 alloc_size, int idx)
+{
+	int ret = hws_pool_resource_alloc(pool, alloc_size, idx);
+
+	if (ret) {
+		mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
+			    pool->type, alloc_size, idx);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct mlx5hws_pool_elements *
+hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx)
+{
+	struct mlx5hws_pool_elements *elem;
+	u32 alloc_size;
+
+	alloc_size = pool->alloc_log_sz;
+
+	elem = kzalloc(sizeof(*elem), GFP_KERNEL);
+	if (!elem)
+		return NULL;
+
+	/* Sharing the same resource, also means that all the elements are with size 1 */
+	if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) &&
+	    !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) {
+		 /* Currently all chunks in size 1 */
+		elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order);
+		if (!elem->bitmap) {
+			mlx5hws_err(pool->ctx,
+				    "Failed to create bitmap type: %d: size %d index: %d\n",
+				    pool->type, alloc_size, idx);
+			goto free_elem;
+		}
+
+		elem->log_size = alloc_size - order;
+	}
+
+	if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) {
+		mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
+			    pool->type, alloc_size, idx);
+		goto free_db;
+	}
+
+	pool->db.element_manager->elements[idx] = elem;
+
+	return elem;
+
+free_db:
+	bitmap_free(elem->bitmap);
+free_elem:
+	kfree(elem);
+	return NULL;
+}
+
+static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg)
+{
+	unsigned int segment, size;
+
+	size = 1 << elem->log_size;
+
+	segment = find_first_bit(elem->bitmap, size);
+	if (segment >= size) {
+		elem->is_full = true;
+		return -ENOMEM;
+	}
+
+	bitmap_clear(elem->bitmap, segment, 1);
+	*seg = segment;
+	return 0;
+}
+
+static int
+hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
+				       u32 *idx, int *seg)
+{
+	struct mlx5hws_pool_elements *elem;
+
+	elem = pool->db.element_manager->elements[0];
+	if (!elem)
+		elem = hws_pool_element_create_new_elem(pool, order, 0);
+	if (!elem)
+		goto err_no_elem;
+
+	if (hws_pool_element_find_seg(elem, seg) != 0) {
+		mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
+		return -ENOMEM;
+	}
+
+	*idx = 0;
+	elem->num_of_elements++;
+	return 0;
+
+err_no_elem:
+	mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
+	return -ENOMEM;
+}
+
+static int
+hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
+				       u32 *idx, int *seg)
+{
+	int ret, i;
+
+	for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
+		if (!pool->resource[i]) {
+			ret = hws_pool_create_resource_on_index(pool, order, i);
+			if (ret)
+				goto err_no_res;
+			*idx = i;
+			*seg = 0; /* One memory slot in that element */
+			return 0;
+		}
+	}
+
+	mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
+	return -ENOMEM;
+
+err_no_res:
+	mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
+	return -ENOMEM;
+}
+
+static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool,
+						 struct mlx5hws_pool_chunk *chunk)
+{
+	int ret;
+
+	/* Go over all memory elements and find/allocate free slot */
+	ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order,
+						     &chunk->resource_idx,
+						     &chunk->offset);
+	if (ret)
+		mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
+			    chunk->order);
+
+	return ret;
+}
+
+static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool,
+						  struct mlx5hws_pool_chunk *chunk)
+{
+	if (unlikely(!pool->resource[chunk->resource_idx]))
+		pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
+
+	if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE)
+		hws_pool_resource_free(pool, chunk->resource_idx);
+}
+
+static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool)
+{
+	(void)pool;
+}
+
+/* This memory management works as the following:
+ * - At start doesn't allocate no mem at all.
+ * - When new request for chunk arrived:
+ *	allocate resource and give it.
+ * - When free that chunk:
+ *	the resource is freed.
+ */
+static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool)
+{
+	pool->p_db_uninit = &hws_pool_general_element_db_uninit;
+	pool->p_get_chunk = &hws_pool_general_element_db_get_chunk;
+	pool->p_put_chunk = &hws_pool_general_element_db_put_chunk;
+
+	return 0;
+}
+
+static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
+						   struct mlx5hws_pool_elements *elem,
+						   struct mlx5hws_pool_chunk *chunk)
+{
+	if (unlikely(!pool->resource[chunk->resource_idx]))
+		pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
+
+	hws_pool_resource_free(pool, chunk->resource_idx);
+	kfree(elem);
+	pool->db.element_manager->elements[chunk->resource_idx] = NULL;
+}
+
+static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
+					     struct mlx5hws_pool_chunk *chunk)
+{
+	struct mlx5hws_pool_elements *elem;
+
+	if (unlikely(chunk->resource_idx))
+		pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
+
+	elem = pool->db.element_manager->elements[chunk->resource_idx];
+	if (!elem) {
+		mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx);
+		return;
+	}
+
+	bitmap_set(elem->bitmap, chunk->offset, 1);
+	elem->is_full = false;
+	elem->num_of_elements--;
+
+	if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE &&
+	    !elem->num_of_elements)
+		hws_onesize_element_db_destroy_element(pool, elem, chunk);
+}
+
+static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
+					    struct mlx5hws_pool_chunk *chunk)
+{
+	int ret = 0;
+
+	/* Go over all memory elements and find/allocate free slot */
+	ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order,
+						     &chunk->resource_idx,
+						     &chunk->offset);
+	if (ret)
+		mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
+			    chunk->order);
+
+	return ret;
+}
+
+static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool)
+{
+	struct mlx5hws_pool_elements *elem;
+	int i;
+
+	for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
+		elem = pool->db.element_manager->elements[i];
+		if (elem) {
+			bitmap_free(elem->bitmap);
+			kfree(elem);
+			pool->db.element_manager->elements[i] = NULL;
+		}
+	}
+	kfree(pool->db.element_manager);
+}
+
+/* This memory management works as the following:
+ * - At start doesn't allocate no mem at all.
+ * - When new request for chunk arrived:
+ *  aloocate the first and only slot of memory/resource
+ *  when it ended return error.
+ */
+static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool)
+{
+	pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL);
+	if (!pool->db.element_manager)
+		return -ENOMEM;
+
+	pool->p_db_uninit = &hws_onesize_element_db_uninit;
+	pool->p_get_chunk = &hws_onesize_element_db_get_chunk;
+	pool->p_put_chunk = &hws_onesize_element_db_put_chunk;
+
+	return 0;
+}
+
+static int hws_pool_db_init(struct mlx5hws_pool *pool,
+			    enum mlx5hws_db_type db_type)
+{
+	int ret;
+
+	if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE)
+		ret = hws_pool_general_element_db_init(pool);
+	else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE)
+		ret = hws_pool_onesize_element_db_init(pool);
+	else
+		ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz);
+
+	if (ret) {
+		mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hws_pool_db_unint(struct mlx5hws_pool *pool)
+{
+	pool->p_db_uninit(pool);
+}
+
+int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
+			     struct mlx5hws_pool_chunk *chunk)
+{
+	int ret;
+
+	mutex_lock(&pool->lock);
+	ret = pool->p_get_chunk(pool, chunk);
+	mutex_unlock(&pool->lock);
+
+	return ret;
+}
+
+void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
+			     struct mlx5hws_pool_chunk *chunk)
+{
+	mutex_lock(&pool->lock);
+	pool->p_put_chunk(pool, chunk);
+	mutex_unlock(&pool->lock);
+}
+
+struct mlx5hws_pool *
+mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_attr)
+{
+	enum mlx5hws_db_type res_db_type;
+	struct mlx5hws_pool *pool;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+
+	pool->ctx = ctx;
+	pool->type = pool_attr->pool_type;
+	pool->alloc_log_sz = pool_attr->alloc_log_sz;
+	pool->flags = pool_attr->flags;
+	pool->tbl_type = pool_attr->table_type;
+	pool->opt_type = pool_attr->opt_type;
+
+	/* Support general db */
+	if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
+			    MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK))
+		res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE;
+	else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
+				 MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS))
+		res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE;
+	else
+		res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY;
+
+	pool->alloc_log_sz = pool_attr->alloc_log_sz;
+
+	if (hws_pool_db_init(pool, res_db_type))
+		goto free_pool;
+
+	mutex_init(&pool->lock);
+
+	return pool;
+
+free_pool:
+	kfree(pool);
+	return NULL;
+}
+
+int mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
+{
+	int i;
+
+	mutex_destroy(&pool->lock);
+
+	for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++)
+		if (pool->resource[i])
+			hws_pool_resource_free(pool, i);
+
+	hws_pool_db_unint(pool);
+
+	kfree(pool);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.h
new file mode 100644
index 000000000000..621298b352b2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_pool.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_POOL_H_
+#define MLX5HWS_POOL_H_
+
+#define MLX5HWS_POOL_STC_LOG_SZ 15
+
+#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100
+
+enum mlx5hws_pool_type {
+	MLX5HWS_POOL_TYPE_STE,
+	MLX5HWS_POOL_TYPE_STC,
+};
+
+struct mlx5hws_pool_chunk {
+	u32 resource_idx;
+	/* Internal offset, relative to base index */
+	int offset;
+	int order;
+};
+
+struct mlx5hws_pool_resource {
+	struct mlx5hws_pool *pool;
+	u32 base_id;
+	u32 range;
+};
+
+enum mlx5hws_pool_flags {
+	/* Only a one resource in that pool */
+	MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0,
+	MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1,
+	/* No sharing resources between chunks */
+	MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2,
+	/* All objects are in the same size */
+	MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3,
+	/* Managed by buddy allocator */
+	MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4,
+	/* Allocate pool_type memory on pool creation */
+	MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5,
+
+	/* These values should be used by the caller */
+	MLX5HWS_POOL_FLAGS_FOR_STC_POOL =
+		MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
+		MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS,
+	MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL =
+		MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
+		MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK,
+	MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL =
+		MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
+		MLX5HWS_POOL_FLAGS_BUDDY_MANAGED |
+		MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE,
+};
+
+enum mlx5hws_pool_optimize {
+	MLX5HWS_POOL_OPTIMIZE_NONE = 0x0,
+	MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1,
+	MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2,
+};
+
+struct mlx5hws_pool_attr {
+	enum mlx5hws_pool_type pool_type;
+	enum mlx5hws_table_type table_type;
+	enum mlx5hws_pool_flags flags;
+	enum mlx5hws_pool_optimize opt_type;
+	/* Allocation size once memory is depleted */
+	size_t alloc_log_sz;
+};
+
+enum mlx5hws_db_type {
+	/* Uses for allocating chunk of big memory, each element has its own resource in the FW*/
+	MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE,
+	/* One resource only, all the elements are with same one size */
+	MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE,
+	/* Many resources, the memory allocated with buddy mechanism */
+	MLX5HWS_POOL_DB_TYPE_BUDDY,
+};
+
+struct mlx5hws_buddy_manager {
+	struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ];
+};
+
+struct mlx5hws_pool_elements {
+	u32 num_of_elements;
+	unsigned long *bitmap;
+	u32 log_size;
+	bool is_full;
+};
+
+struct mlx5hws_element_manager {
+	struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ];
+};
+
+struct mlx5hws_pool_db {
+	enum mlx5hws_db_type type;
+	union {
+		struct mlx5hws_element_manager *element_manager;
+		struct mlx5hws_buddy_manager *buddy_manager;
+	};
+};
+
+typedef int (*mlx5hws_pool_db_get_chunk)(struct mlx5hws_pool *pool,
+					struct mlx5hws_pool_chunk *chunk);
+typedef void (*mlx5hws_pool_db_put_chunk)(struct mlx5hws_pool *pool,
+					 struct mlx5hws_pool_chunk *chunk);
+typedef void (*mlx5hws_pool_unint_db)(struct mlx5hws_pool *pool);
+
+struct mlx5hws_pool {
+	struct mlx5hws_context *ctx;
+	enum mlx5hws_pool_type type;
+	enum mlx5hws_pool_flags flags;
+	struct mutex lock; /* protect the pool */
+	size_t alloc_log_sz;
+	enum mlx5hws_table_type tbl_type;
+	enum mlx5hws_pool_optimize opt_type;
+	struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
+	struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
+	/* DB */
+	struct mlx5hws_pool_db db;
+	/* Functions */
+	mlx5hws_pool_unint_db p_db_uninit;
+	mlx5hws_pool_db_get_chunk p_get_chunk;
+	mlx5hws_pool_db_put_chunk p_put_chunk;
+};
+
+struct mlx5hws_pool *
+mlx5hws_pool_create(struct mlx5hws_context *ctx,
+		    struct mlx5hws_pool_attr *pool_attr);
+
+int mlx5hws_pool_destroy(struct mlx5hws_pool *pool);
+
+int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
+			     struct mlx5hws_pool_chunk *chunk);
+
+void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
+			     struct mlx5hws_pool_chunk *chunk);
+
+static inline u32
+mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool,
+			       struct mlx5hws_pool_chunk *chunk)
+{
+	return pool->resource[chunk->resource_idx]->base_id;
+}
+
+static inline u32
+mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool,
+				      struct mlx5hws_pool_chunk *chunk)
+{
+	return pool->mirror_resource[chunk->resource_idx]->base_id;
+}
+#endif /* MLX5HWS_POOL_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_prm.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_prm.h
new file mode 100644
index 000000000000..de92cecbeb92
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_prm.h
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5_PRM_H_
+#define MLX5_PRM_H_
+
+#define MLX5_MAX_ACTIONS_DATA_IN_HEADER_MODIFY 512
+
+/* Action type of header modification. */
+enum {
+	MLX5_MODIFICATION_TYPE_SET = 0x1,
+	MLX5_MODIFICATION_TYPE_ADD = 0x2,
+	MLX5_MODIFICATION_TYPE_COPY = 0x3,
+	MLX5_MODIFICATION_TYPE_INSERT = 0x4,
+	MLX5_MODIFICATION_TYPE_REMOVE = 0x5,
+	MLX5_MODIFICATION_TYPE_NOP = 0x6,
+	MLX5_MODIFICATION_TYPE_REMOVE_WORDS = 0x7,
+	MLX5_MODIFICATION_TYPE_ADD_FIELD = 0x8,
+	MLX5_MODIFICATION_TYPE_MAX,
+};
+
+/* The field of packet to be modified. */
+enum mlx5_modification_field {
+	MLX5_MODI_OUT_NONE = -1,
+	MLX5_MODI_OUT_SMAC_47_16 = 1,
+	MLX5_MODI_OUT_SMAC_15_0,
+	MLX5_MODI_OUT_ETHERTYPE,
+	MLX5_MODI_OUT_DMAC_47_16,
+	MLX5_MODI_OUT_DMAC_15_0,
+	MLX5_MODI_OUT_IP_DSCP,
+	MLX5_MODI_OUT_TCP_FLAGS,
+	MLX5_MODI_OUT_TCP_SPORT,
+	MLX5_MODI_OUT_TCP_DPORT,
+	MLX5_MODI_OUT_IPV4_TTL,
+	MLX5_MODI_OUT_UDP_SPORT,
+	MLX5_MODI_OUT_UDP_DPORT,
+	MLX5_MODI_OUT_SIPV6_127_96,
+	MLX5_MODI_OUT_SIPV6_95_64,
+	MLX5_MODI_OUT_SIPV6_63_32,
+	MLX5_MODI_OUT_SIPV6_31_0,
+	MLX5_MODI_OUT_DIPV6_127_96,
+	MLX5_MODI_OUT_DIPV6_95_64,
+	MLX5_MODI_OUT_DIPV6_63_32,
+	MLX5_MODI_OUT_DIPV6_31_0,
+	MLX5_MODI_OUT_SIPV4,
+	MLX5_MODI_OUT_DIPV4,
+	MLX5_MODI_OUT_FIRST_VID,
+	MLX5_MODI_IN_SMAC_47_16 = 0x31,
+	MLX5_MODI_IN_SMAC_15_0,
+	MLX5_MODI_IN_ETHERTYPE,
+	MLX5_MODI_IN_DMAC_47_16,
+	MLX5_MODI_IN_DMAC_15_0,
+	MLX5_MODI_IN_IP_DSCP,
+	MLX5_MODI_IN_TCP_FLAGS,
+	MLX5_MODI_IN_TCP_SPORT,
+	MLX5_MODI_IN_TCP_DPORT,
+	MLX5_MODI_IN_IPV4_TTL,
+	MLX5_MODI_IN_UDP_SPORT,
+	MLX5_MODI_IN_UDP_DPORT,
+	MLX5_MODI_IN_SIPV6_127_96,
+	MLX5_MODI_IN_SIPV6_95_64,
+	MLX5_MODI_IN_SIPV6_63_32,
+	MLX5_MODI_IN_SIPV6_31_0,
+	MLX5_MODI_IN_DIPV6_127_96,
+	MLX5_MODI_IN_DIPV6_95_64,
+	MLX5_MODI_IN_DIPV6_63_32,
+	MLX5_MODI_IN_DIPV6_31_0,
+	MLX5_MODI_IN_SIPV4,
+	MLX5_MODI_IN_DIPV4,
+	MLX5_MODI_OUT_IPV6_HOPLIMIT,
+	MLX5_MODI_IN_IPV6_HOPLIMIT,
+	MLX5_MODI_META_DATA_REG_A,
+	MLX5_MODI_META_DATA_REG_B = 0x50,
+	MLX5_MODI_META_REG_C_0,
+	MLX5_MODI_META_REG_C_1,
+	MLX5_MODI_META_REG_C_2,
+	MLX5_MODI_META_REG_C_3,
+	MLX5_MODI_META_REG_C_4,
+	MLX5_MODI_META_REG_C_5,
+	MLX5_MODI_META_REG_C_6,
+	MLX5_MODI_META_REG_C_7,
+	MLX5_MODI_OUT_TCP_SEQ_NUM,
+	MLX5_MODI_IN_TCP_SEQ_NUM,
+	MLX5_MODI_OUT_TCP_ACK_NUM,
+	MLX5_MODI_IN_TCP_ACK_NUM = 0x5C,
+	MLX5_MODI_GTP_TEID = 0x6E,
+	MLX5_MODI_OUT_IP_ECN = 0x73,
+	MLX5_MODI_TUNNEL_HDR_DW_1 = 0x75,
+	MLX5_MODI_GTPU_FIRST_EXT_DW_0 = 0x76,
+	MLX5_MODI_HASH_RESULT = 0x81,
+	MLX5_MODI_IN_MPLS_LABEL_0 = 0x8a,
+	MLX5_MODI_IN_MPLS_LABEL_1,
+	MLX5_MODI_IN_MPLS_LABEL_2,
+	MLX5_MODI_IN_MPLS_LABEL_3,
+	MLX5_MODI_IN_MPLS_LABEL_4,
+	MLX5_MODI_OUT_IP_PROTOCOL = 0x4A,
+	MLX5_MODI_OUT_IPV6_NEXT_HDR = 0x4A,
+	MLX5_MODI_META_REG_C_8 = 0x8F,
+	MLX5_MODI_META_REG_C_9 = 0x90,
+	MLX5_MODI_META_REG_C_10 = 0x91,
+	MLX5_MODI_META_REG_C_11 = 0x92,
+	MLX5_MODI_META_REG_C_12 = 0x93,
+	MLX5_MODI_META_REG_C_13 = 0x94,
+	MLX5_MODI_META_REG_C_14 = 0x95,
+	MLX5_MODI_META_REG_C_15 = 0x96,
+	MLX5_MODI_OUT_IPV4_TOTAL_LEN = 0x11D,
+	MLX5_MODI_OUT_IPV6_PAYLOAD_LEN = 0x11E,
+	MLX5_MODI_OUT_IPV4_IHL = 0x11F,
+	MLX5_MODI_OUT_TCP_DATA_OFFSET = 0x120,
+	MLX5_MODI_OUT_ESP_SPI = 0x5E,
+	MLX5_MODI_OUT_ESP_SEQ_NUM = 0x82,
+	MLX5_MODI_OUT_IPSEC_NEXT_HDR = 0x126,
+	MLX5_MODI_INVALID = INT_MAX,
+};
+
+enum {
+	MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_ESW_FLOW_TABLE = 0x8 << 1,
+	MLX5_SET_HCA_CAP_OP_MOD_ESW = 0x9 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_WQE_BASED_FLOW_TABLE = 0x1B << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1,
+};
+
+enum mlx5_ifc_rtc_update_mode {
+	MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH = 0x0,
+	MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET = 0x1,
+};
+
+enum mlx5_ifc_rtc_access_mode {
+	MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH = 0x0,
+	MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR = 0x1,
+};
+
+enum mlx5_ifc_rtc_ste_format {
+	MLX5_IFC_RTC_STE_FORMAT_8DW = 0x4,
+	MLX5_IFC_RTC_STE_FORMAT_11DW = 0x5,
+	MLX5_IFC_RTC_STE_FORMAT_RANGE = 0x7,
+};
+
+enum mlx5_ifc_rtc_reparse_mode {
+	MLX5_IFC_RTC_REPARSE_NEVER = 0x0,
+	MLX5_IFC_RTC_REPARSE_ALWAYS = 0x1,
+	MLX5_IFC_RTC_REPARSE_BY_STC = 0x2,
+};
+
+#define MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX 16
+
+struct mlx5_ifc_rtc_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x40];
+	u8 update_index_mode[0x2];
+	u8 reparse_mode[0x2];
+	u8 num_match_ste[0x4];
+	u8 pd[0x18];
+	u8 reserved_at_a0[0x9];
+	u8 access_index_mode[0x3];
+	u8 num_hash_definer[0x4];
+	u8 update_method[0x1];
+	u8 reserved_at_b1[0x2];
+	u8 log_depth[0x5];
+	u8 log_hash_size[0x8];
+	u8 ste_format_0[0x8];
+	u8 table_type[0x8];
+	u8 ste_format_1[0x8];
+	u8 reserved_at_d8[0x8];
+	u8 match_definer_0[0x20];
+	u8 stc_id[0x20];
+	u8 ste_table_base_id[0x20];
+	u8 ste_table_offset[0x20];
+	u8 reserved_at_160[0x8];
+	u8 miss_flow_table_id[0x18];
+	u8 match_definer_1[0x20];
+	u8 reserved_at_1a0[0x260];
+};
+
+enum mlx5_ifc_stc_action_type {
+	MLX5_IFC_STC_ACTION_TYPE_NOP = 0x00,
+	MLX5_IFC_STC_ACTION_TYPE_COPY = 0x05,
+	MLX5_IFC_STC_ACTION_TYPE_SET = 0x06,
+	MLX5_IFC_STC_ACTION_TYPE_ADD = 0x07,
+	MLX5_IFC_STC_ACTION_TYPE_REMOVE_WORDS = 0x08,
+	MLX5_IFC_STC_ACTION_TYPE_HEADER_REMOVE = 0x09,
+	MLX5_IFC_STC_ACTION_TYPE_HEADER_INSERT = 0x0b,
+	MLX5_IFC_STC_ACTION_TYPE_TAG = 0x0c,
+	MLX5_IFC_STC_ACTION_TYPE_ACC_MODIFY_LIST = 0x0e,
+	MLX5_IFC_STC_ACTION_TYPE_CRYPTO_IPSEC_ENCRYPTION = 0x10,
+	MLX5_IFC_STC_ACTION_TYPE_CRYPTO_IPSEC_DECRYPTION = 0x11,
+	MLX5_IFC_STC_ACTION_TYPE_ASO = 0x12,
+	MLX5_IFC_STC_ACTION_TYPE_TRAILER = 0x13,
+	MLX5_IFC_STC_ACTION_TYPE_COUNTER = 0x14,
+	MLX5_IFC_STC_ACTION_TYPE_ADD_FIELD = 0x1b,
+	MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE = 0x80,
+	MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_TIR = 0x81,
+	MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT = 0x82,
+	MLX5_IFC_STC_ACTION_TYPE_DROP = 0x83,
+	MLX5_IFC_STC_ACTION_TYPE_ALLOW = 0x84,
+	MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_VPORT = 0x85,
+	MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_UPLINK = 0x86,
+};
+
+enum mlx5_ifc_stc_reparse_mode {
+	MLX5_IFC_STC_REPARSE_IGNORE = 0x0,
+	MLX5_IFC_STC_REPARSE_NEVER = 0x1,
+	MLX5_IFC_STC_REPARSE_ALWAYS = 0x2,
+};
+
+struct mlx5_ifc_stc_ste_param_ste_table_bits {
+	u8 ste_obj_id[0x20];
+	u8 match_definer_id[0x20];
+	u8 reserved_at_40[0x3];
+	u8 log_hash_size[0x5];
+	u8 reserved_at_48[0x38];
+};
+
+struct mlx5_ifc_stc_ste_param_tir_bits {
+	u8 reserved_at_0[0x8];
+	u8 tirn[0x18];
+	u8 reserved_at_20[0x60];
+};
+
+struct mlx5_ifc_stc_ste_param_table_bits {
+	u8 reserved_at_0[0x8];
+	u8 table_id[0x18];
+	u8 reserved_at_20[0x60];
+};
+
+struct mlx5_ifc_stc_ste_param_flow_counter_bits {
+	u8 flow_counter_id[0x20];
+};
+
+enum {
+	MLX5_ASO_CT_NUM_PER_OBJ = 1,
+	MLX5_ASO_METER_NUM_PER_OBJ = 2,
+	MLX5_ASO_IPSEC_NUM_PER_OBJ = 1,
+	MLX5_ASO_FIRST_HIT_NUM_PER_OBJ = 512,
+};
+
+struct mlx5_ifc_stc_ste_param_execute_aso_bits {
+	u8 aso_object_id[0x20];
+	u8 return_reg_id[0x4];
+	u8 aso_type[0x4];
+	u8 reserved_at_28[0x18];
+};
+
+struct mlx5_ifc_stc_ste_param_ipsec_encrypt_bits {
+	u8 ipsec_object_id[0x20];
+};
+
+struct mlx5_ifc_stc_ste_param_ipsec_decrypt_bits {
+	u8 ipsec_object_id[0x20];
+};
+
+struct mlx5_ifc_stc_ste_param_trailer_bits {
+	u8 reserved_at_0[0x8];
+	u8 command[0x4];
+	u8 reserved_at_c[0x2];
+	u8 type[0x2];
+	u8 reserved_at_10[0xa];
+	u8 length[0x6];
+};
+
+struct mlx5_ifc_stc_ste_param_header_modify_list_bits {
+	u8 header_modify_pattern_id[0x20];
+	u8 header_modify_argument_id[0x20];
+};
+
+enum mlx5_ifc_header_anchors {
+	MLX5_HEADER_ANCHOR_PACKET_START = 0x0,
+	MLX5_HEADER_ANCHOR_MAC = 0x1,
+	MLX5_HEADER_ANCHOR_FIRST_VLAN_START = 0x2,
+	MLX5_HEADER_ANCHOR_IPV6_IPV4 = 0x07,
+	MLX5_HEADER_ANCHOR_ESP = 0x08,
+	MLX5_HEADER_ANCHOR_TCP_UDP = 0x09,
+	MLX5_HEADER_ANCHOR_TUNNEL_HEADER = 0x0a,
+	MLX5_HEADER_ANCHOR_INNER_MAC = 0x13,
+	MLX5_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19,
+	MLX5_HEADER_ANCHOR_INNER_TCP_UDP = 0x1a,
+	MLX5_HEADER_ANCHOR_L4_PAYLOAD = 0x1b,
+	MLX5_HEADER_ANCHOR_INNER_L4_PAYLOAD = 0x1c
+};
+
+struct mlx5_ifc_stc_ste_param_remove_bits {
+	u8 action_type[0x4];
+	u8 decap[0x1];
+	u8 reserved_at_5[0x5];
+	u8 remove_start_anchor[0x6];
+	u8 reserved_at_10[0x2];
+	u8 remove_end_anchor[0x6];
+	u8 reserved_at_18[0x8];
+};
+
+struct mlx5_ifc_stc_ste_param_remove_words_bits {
+	u8 action_type[0x4];
+	u8 reserved_at_4[0x6];
+	u8 remove_start_anchor[0x6];
+	u8 reserved_at_10[0x1];
+	u8 remove_offset[0x7];
+	u8 reserved_at_18[0x2];
+	u8 remove_size[0x6];
+};
+
+struct mlx5_ifc_stc_ste_param_insert_bits {
+	u8 action_type[0x4];
+	u8 encap[0x1];
+	u8 inline_data[0x1];
+	u8 reserved_at_6[0x4];
+	u8 insert_anchor[0x6];
+	u8 reserved_at_10[0x1];
+	u8 insert_offset[0x7];
+	u8 reserved_at_18[0x1];
+	u8 insert_size[0x7];
+	u8 insert_argument[0x20];
+};
+
+struct mlx5_ifc_stc_ste_param_vport_bits {
+	u8 eswitch_owner_vhca_id[0x10];
+	u8 vport_number[0x10];
+	u8 eswitch_owner_vhca_id_valid[0x1];
+	u8 reserved_at_21[0x5f];
+};
+
+union mlx5_ifc_stc_param_bits {
+	struct mlx5_ifc_stc_ste_param_ste_table_bits ste_table;
+	struct mlx5_ifc_stc_ste_param_tir_bits tir;
+	struct mlx5_ifc_stc_ste_param_table_bits table;
+	struct mlx5_ifc_stc_ste_param_flow_counter_bits counter;
+	struct mlx5_ifc_stc_ste_param_header_modify_list_bits modify_header;
+	struct mlx5_ifc_stc_ste_param_execute_aso_bits aso;
+	struct mlx5_ifc_stc_ste_param_remove_bits remove_header;
+	struct mlx5_ifc_stc_ste_param_insert_bits insert_header;
+	struct mlx5_ifc_set_action_in_bits add;
+	struct mlx5_ifc_set_action_in_bits set;
+	struct mlx5_ifc_copy_action_in_bits copy;
+	struct mlx5_ifc_stc_ste_param_vport_bits vport;
+	struct mlx5_ifc_stc_ste_param_ipsec_encrypt_bits ipsec_encrypt;
+	struct mlx5_ifc_stc_ste_param_ipsec_decrypt_bits ipsec_decrypt;
+	struct mlx5_ifc_stc_ste_param_trailer_bits trailer;
+	u8 reserved_at_0[0x80];
+};
+
+enum {
+	MLX5_IFC_MODIFY_STC_FIELD_SELECT_NEW_STC = BIT(0),
+};
+
+struct mlx5_ifc_stc_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x46];
+	u8 reparse_mode[0x2];
+	u8 table_type[0x8];
+	u8 ste_action_offset[0x8];
+	u8 action_type[0x8];
+	u8 reserved_at_a0[0x60];
+	union mlx5_ifc_stc_param_bits stc_param;
+	u8 reserved_at_180[0x280];
+};
+
+struct mlx5_ifc_ste_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x48];
+	u8 table_type[0x8];
+	u8 reserved_at_90[0x370];
+};
+
+struct mlx5_ifc_definer_bits {
+	u8 modify_field_select[0x40];
+	u8 reserved_at_40[0x50];
+	u8 format_id[0x10];
+	u8 reserved_at_60[0x60];
+	u8 format_select_dw3[0x8];
+	u8 format_select_dw2[0x8];
+	u8 format_select_dw1[0x8];
+	u8 format_select_dw0[0x8];
+	u8 format_select_dw7[0x8];
+	u8 format_select_dw6[0x8];
+	u8 format_select_dw5[0x8];
+	u8 format_select_dw4[0x8];
+	u8 reserved_at_100[0x18];
+	u8 format_select_dw8[0x8];
+	u8 reserved_at_120[0x20];
+	u8 format_select_byte3[0x8];
+	u8 format_select_byte2[0x8];
+	u8 format_select_byte1[0x8];
+	u8 format_select_byte0[0x8];
+	u8 format_select_byte7[0x8];
+	u8 format_select_byte6[0x8];
+	u8 format_select_byte5[0x8];
+	u8 format_select_byte4[0x8];
+	u8 reserved_at_180[0x40];
+	u8 ctrl[0xa0];
+	u8 match_mask[0x160];
+};
+
+struct mlx5_ifc_arg_bits {
+	u8 rsvd0[0x88];
+	u8 access_pd[0x18];
+};
+
+struct mlx5_ifc_header_modify_pattern_in_bits {
+	u8 modify_field_select[0x40];
+
+	u8 reserved_at_40[0x40];
+
+	u8 pattern_length[0x8];
+	u8 reserved_at_88[0x18];
+
+	u8 reserved_at_a0[0x60];
+
+	u8 pattern_data[MLX5_MAX_ACTIONS_DATA_IN_HEADER_MODIFY * 8];
+};
+
+struct mlx5_ifc_create_rtc_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_rtc_bits rtc;
+};
+
+struct mlx5_ifc_create_stc_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_stc_bits stc;
+};
+
+struct mlx5_ifc_create_ste_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_ste_bits ste;
+};
+
+struct mlx5_ifc_create_definer_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_definer_bits definer;
+};
+
+struct mlx5_ifc_create_arg_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_arg_bits arg;
+};
+
+struct mlx5_ifc_create_header_modify_pattern_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+	struct mlx5_ifc_header_modify_pattern_in_bits pattern;
+};
+
+struct mlx5_ifc_generate_wqe_in_bits {
+	u8 opcode[0x10];
+	u8 uid[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mode[0x10];
+	u8 reserved_at_40[0x40];
+	u8 reserved_at_80[0x8];
+	u8 pdn[0x18];
+	u8 reserved_at_a0[0x160];
+	u8 wqe_ctrl[0x80];
+	u8 wqe_gta_ctrl[0x180];
+	u8 wqe_gta_data_0[0x200];
+	u8 wqe_gta_data_1[0x200];
+};
+
+struct mlx5_ifc_generate_wqe_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x1c0];
+	u8 cqe_data[0x200];
+};
+
+enum mlx5_access_aso_opc_mod {
+	ASO_OPC_MOD_IPSEC = 0x0,
+	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
+	ASO_OPC_MOD_POLICER = 0x2,
+	ASO_OPC_MOD_RACE_AVOIDANCE = 0x3,
+	ASO_OPC_MOD_FLOW_HIT = 0x4,
+};
+
+enum {
+	MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION = BIT(0),
+	MLX5_IFC_MODIFY_FLOW_TABLE_RTC_ID = BIT(1),
+};
+
+enum {
+	MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_DEFAULT = 0,
+	MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL = 1,
+};
+
+struct mlx5_ifc_alloc_packet_reformat_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+
+	u8 syndrome[0x20];
+
+	u8 packet_reformat_id[0x20];
+
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_dealloc_packet_reformat_in_bits {
+	u8 opcode[0x10];
+	u8 reserved_at_10[0x10];
+
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+
+	u8 packet_reformat_id[0x20];
+
+	u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_dealloc_packet_reformat_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+
+	u8 syndrome[0x20];
+
+	u8 reserved_at_40[0x40];
+};
+
+#endif /* MLX5_PRM_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.c
new file mode 100644
index 000000000000..8a011b958b43
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+static void hws_rule_skip(struct mlx5hws_matcher *matcher,
+			  struct mlx5hws_match_template *mt,
+			  u32 flow_source,
+			  bool *skip_rx, bool *skip_tx)
+{
+	/* By default FDB rules are added to both RX and TX */
+	*skip_rx = false;
+	*skip_tx = false;
+
+	if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) {
+		*skip_rx = true;
+	} else if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) {
+		*skip_tx = true;
+	} else {
+		/* If no flow source was set for current rule,
+		 * check for flow source in matcher attributes.
+		 */
+		if (matcher->attr.optimize_flow_src) {
+			*skip_tx =
+				matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE;
+			*skip_rx =
+				matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT;
+			return;
+		}
+	}
+}
+
+static void
+hws_rule_update_copy_tag(struct mlx5hws_rule *rule,
+			 struct mlx5hws_wqe_gta_data_seg_ste *wqe_data,
+			 bool is_jumbo)
+{
+	struct mlx5hws_rule_match_tag *tag;
+
+	if (!mlx5hws_matcher_is_resizable(rule->matcher)) {
+		tag = &rule->tag;
+	} else {
+		struct mlx5hws_wqe_gta_data_seg_ste *data_seg =
+			(struct mlx5hws_wqe_gta_data_seg_ste *)(void *)rule->resize_info->data_seg;
+		tag = (struct mlx5hws_rule_match_tag *)(void *)data_seg->action;
+	}
+
+	if (is_jumbo)
+		memcpy(wqe_data->jumbo, tag->jumbo, MLX5HWS_JUMBO_TAG_SZ);
+	else
+		memcpy(wqe_data->tag, tag->match, MLX5HWS_MATCH_TAG_SZ);
+}
+
+static void hws_rule_init_dep_wqe(struct mlx5hws_send_ring_dep_wqe *dep_wqe,
+				  struct mlx5hws_rule *rule,
+				  struct mlx5hws_match_template *mt,
+				  struct mlx5hws_rule_attr *attr)
+{
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	bool skip_rx, skip_tx;
+
+	dep_wqe->rule = rule;
+	dep_wqe->user_data = attr->user_data;
+	dep_wqe->direct_index = mlx5hws_matcher_is_insert_by_idx(matcher) ?
+				attr->rule_idx : 0;
+
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
+		hws_rule_skip(matcher, mt, attr->flow_source, &skip_rx, &skip_tx);
+
+		if (!skip_rx) {
+			dep_wqe->rtc_0 = matcher->match_ste.rtc_0_id;
+			dep_wqe->retry_rtc_0 = matcher->col_matcher ?
+					       matcher->col_matcher->match_ste.rtc_0_id : 0;
+		} else {
+			dep_wqe->rtc_0 = 0;
+			dep_wqe->retry_rtc_0 = 0;
+		}
+
+		if (!skip_tx) {
+			dep_wqe->rtc_1 = matcher->match_ste.rtc_1_id;
+			dep_wqe->retry_rtc_1 = matcher->col_matcher ?
+					       matcher->col_matcher->match_ste.rtc_1_id : 0;
+		} else {
+			dep_wqe->rtc_1 = 0;
+			dep_wqe->retry_rtc_1 = 0;
+		}
+	} else {
+		pr_warn("HWS: invalid tbl->type: %d\n", tbl->type);
+	}
+}
+
+static void hws_rule_move_get_rtc(struct mlx5hws_rule *rule,
+				  struct mlx5hws_send_ste_attr *ste_attr)
+{
+	struct mlx5hws_matcher *dst_matcher = rule->matcher->resize_dst;
+
+	if (rule->resize_info->rtc_0) {
+		ste_attr->rtc_0 = dst_matcher->match_ste.rtc_0_id;
+		ste_attr->retry_rtc_0 = dst_matcher->col_matcher ?
+					dst_matcher->col_matcher->match_ste.rtc_0_id : 0;
+	}
+	if (rule->resize_info->rtc_1) {
+		ste_attr->rtc_1 = dst_matcher->match_ste.rtc_1_id;
+		ste_attr->retry_rtc_1 = dst_matcher->col_matcher ?
+					dst_matcher->col_matcher->match_ste.rtc_1_id : 0;
+	}
+}
+
+static void hws_rule_gen_comp(struct mlx5hws_send_engine *queue,
+			      struct mlx5hws_rule *rule,
+			      bool err,
+			      void *user_data,
+			      enum mlx5hws_rule_status rule_status_on_succ)
+{
+	enum mlx5hws_flow_op_status comp_status;
+
+	if (!err) {
+		comp_status = MLX5HWS_FLOW_OP_SUCCESS;
+		rule->status = rule_status_on_succ;
+	} else {
+		comp_status = MLX5HWS_FLOW_OP_ERROR;
+		rule->status = MLX5HWS_RULE_STATUS_FAILED;
+	}
+
+	mlx5hws_send_engine_inc_rule(queue);
+	mlx5hws_send_engine_gen_comp(queue, user_data, comp_status);
+}
+
+static void
+hws_rule_save_resize_info(struct mlx5hws_rule *rule,
+			  struct mlx5hws_send_ste_attr *ste_attr,
+			  bool is_update)
+{
+	if (!mlx5hws_matcher_is_resizable(rule->matcher))
+		return;
+
+	if (likely(!is_update)) {
+		rule->resize_info = kzalloc(sizeof(*rule->resize_info), GFP_KERNEL);
+		if (unlikely(!rule->resize_info)) {
+			pr_warn("HWS: resize info isn't allocated for rule\n");
+			return;
+		}
+
+		rule->resize_info->max_stes =
+			rule->matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes;
+		rule->resize_info->action_ste_pool[0] = rule->matcher->action_ste[0].max_stes ?
+							rule->matcher->action_ste[0].pool :
+							NULL;
+		rule->resize_info->action_ste_pool[1] = rule->matcher->action_ste[1].max_stes ?
+							rule->matcher->action_ste[1].pool :
+							NULL;
+	}
+
+	memcpy(rule->resize_info->ctrl_seg, ste_attr->wqe_ctrl,
+	       sizeof(rule->resize_info->ctrl_seg));
+	memcpy(rule->resize_info->data_seg, ste_attr->wqe_data,
+	       sizeof(rule->resize_info->data_seg));
+}
+
+void mlx5hws_rule_clear_resize_info(struct mlx5hws_rule *rule)
+{
+	if (mlx5hws_matcher_is_resizable(rule->matcher) &&
+	    rule->resize_info) {
+		kfree(rule->resize_info);
+		rule->resize_info = NULL;
+	}
+}
+
+static void
+hws_rule_save_delete_info(struct mlx5hws_rule *rule,
+			  struct mlx5hws_send_ste_attr *ste_attr)
+{
+	struct mlx5hws_match_template *mt = rule->matcher->mt;
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
+
+	if (mlx5hws_matcher_is_resizable(rule->matcher))
+		return;
+
+	if (is_jumbo)
+		memcpy(&rule->tag.jumbo, ste_attr->wqe_data->jumbo, MLX5HWS_JUMBO_TAG_SZ);
+	else
+		memcpy(&rule->tag.match, ste_attr->wqe_data->tag, MLX5HWS_MATCH_TAG_SZ);
+}
+
+static void
+hws_rule_clear_delete_info(struct mlx5hws_rule *rule)
+{
+	/* nothing to do here */
+}
+
+static void
+hws_rule_load_delete_info(struct mlx5hws_rule *rule,
+			  struct mlx5hws_send_ste_attr *ste_attr)
+{
+	if (unlikely(!mlx5hws_matcher_is_resizable(rule->matcher))) {
+		ste_attr->wqe_tag = &rule->tag;
+	} else {
+		struct mlx5hws_wqe_gta_data_seg_ste *data_seg =
+			(struct mlx5hws_wqe_gta_data_seg_ste *)(void *)rule->resize_info->data_seg;
+		struct mlx5hws_rule_match_tag *tag =
+			(struct mlx5hws_rule_match_tag *)(void *)data_seg->action;
+		ste_attr->wqe_tag = tag;
+	}
+}
+
+static int hws_rule_alloc_action_ste_idx(struct mlx5hws_rule *rule,
+					 u8 action_ste_selector)
+{
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_matcher_action_ste *action_ste;
+	struct mlx5hws_pool_chunk ste = {0};
+	int ret;
+
+	action_ste = &matcher->action_ste[action_ste_selector];
+	ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes));
+	ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste);
+	if (unlikely(ret)) {
+		mlx5hws_err(matcher->tbl->ctx,
+			    "Failed to allocate STE for rule actions");
+		return ret;
+	}
+	rule->action_ste_idx = ste.offset;
+
+	return 0;
+}
+
+static void hws_rule_free_action_ste_idx(struct mlx5hws_rule *rule,
+					 u8 action_ste_selector)
+{
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_pool_chunk ste = {0};
+	struct mlx5hws_pool *pool;
+	u8 max_stes;
+
+	if (mlx5hws_matcher_is_resizable(matcher)) {
+		/* Free the original action pool if rule was resized */
+		max_stes = rule->resize_info->max_stes;
+		pool = rule->resize_info->action_ste_pool[action_ste_selector];
+	} else {
+		max_stes = matcher->action_ste[action_ste_selector].max_stes;
+		pool = matcher->action_ste[action_ste_selector].pool;
+	}
+
+	/* This release is safe only when the rule match part was deleted */
+	ste.order = ilog2(roundup_pow_of_two(max_stes));
+	ste.offset = rule->action_ste_idx;
+
+	mlx5hws_pool_chunk_free(pool, &ste);
+}
+
+static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule,
+				     struct mlx5hws_rule_attr *attr)
+{
+	int action_ste_idx;
+	int ret;
+
+	ret = hws_rule_alloc_action_ste_idx(rule, 0);
+	if (unlikely(ret))
+		return ret;
+
+	action_ste_idx = rule->action_ste_idx;
+
+	ret = hws_rule_alloc_action_ste_idx(rule, 1);
+	if (unlikely(ret)) {
+		hws_rule_free_action_ste_idx(rule, 0);
+		return ret;
+	}
+
+	/* Both pools have to return the same index */
+	if (unlikely(rule->action_ste_idx != action_ste_idx)) {
+		pr_warn("HWS: allocation of action STE failed - pool indexes mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule)
+{
+	if (rule->action_ste_idx > -1) {
+		hws_rule_free_action_ste_idx(rule, 1);
+		hws_rule_free_action_ste_idx(rule, 0);
+	}
+}
+
+static void hws_rule_create_init(struct mlx5hws_rule *rule,
+				 struct mlx5hws_send_ste_attr *ste_attr,
+				 struct mlx5hws_actions_apply_data *apply,
+				 bool is_update)
+{
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_table *tbl = matcher->tbl;
+	struct mlx5hws_context *ctx = tbl->ctx;
+
+	/* Init rule before reuse */
+	if (!is_update) {
+		/* In update we use these rtc's */
+		rule->rtc_0 = 0;
+		rule->rtc_1 = 0;
+		rule->action_ste_selector = 0;
+	} else {
+		rule->action_ste_selector = !rule->action_ste_selector;
+	}
+
+	rule->pending_wqes = 0;
+	rule->action_ste_idx = -1;
+	rule->status = MLX5HWS_RULE_STATUS_CREATING;
+
+	/* Init default send STE attributes */
+	ste_attr->gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
+	ste_attr->send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr->send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr->send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+
+	/* Init default action apply */
+	apply->tbl_type = tbl->type;
+	apply->common_res = &ctx->common_res[tbl->type];
+	apply->jump_to_action_stc = matcher->action_ste[0].stc.offset;
+	apply->require_dep = 0;
+}
+
+static void hws_rule_move_init(struct mlx5hws_rule *rule,
+			       struct mlx5hws_rule_attr *attr)
+{
+	/* Save the old RTC IDs to be later used in match STE delete */
+	rule->resize_info->rtc_0 = rule->rtc_0;
+	rule->resize_info->rtc_1 = rule->rtc_1;
+	rule->resize_info->rule_idx = attr->rule_idx;
+
+	rule->rtc_0 = 0;
+	rule->rtc_1 = 0;
+
+	rule->pending_wqes = 0;
+	rule->action_ste_idx = -1;
+	rule->action_ste_selector = 0;
+	rule->status = MLX5HWS_RULE_STATUS_CREATING;
+	rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_WRITING;
+}
+
+bool mlx5hws_rule_move_in_progress(struct mlx5hws_rule *rule)
+{
+	return mlx5hws_matcher_is_in_resize(rule->matcher) &&
+	       rule->resize_info &&
+	       rule->resize_info->state != MLX5HWS_RULE_RESIZE_STATE_IDLE;
+}
+
+static int hws_rule_create_hws(struct mlx5hws_rule *rule,
+			       struct mlx5hws_rule_attr *attr,
+			       u8 mt_idx,
+			       u32 *match_param,
+			       u8 at_idx,
+			       struct mlx5hws_rule_action rule_actions[])
+{
+	struct mlx5hws_action_template *at = &rule->matcher->at[at_idx];
+	struct mlx5hws_match_template *mt = &rule->matcher->mt[mt_idx];
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_context *ctx = matcher->tbl->ctx;
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+	struct mlx5hws_send_ring_dep_wqe *dep_wqe;
+	struct mlx5hws_actions_wqe_setter *setter;
+	struct mlx5hws_actions_apply_data apply;
+	struct mlx5hws_send_engine *queue;
+	u8 total_stes, action_stes;
+	bool is_update;
+	int i, ret;
+
+	is_update = !match_param;
+
+	setter = &at->setters[at->num_of_action_stes];
+	total_stes = at->num_of_action_stes + (is_jumbo && !at->only_term);
+	action_stes = total_stes - 1;
+
+	queue = &ctx->send_queue[attr->queue_id];
+	if (unlikely(mlx5hws_send_engine_err(queue)))
+		return -EIO;
+
+	hws_rule_create_init(rule, &ste_attr, &apply, is_update);
+
+	/* Allocate dependent match WQE since rule might have dependent writes.
+	 * The queued dependent WQE can be later aborted or kept as a dependency.
+	 * dep_wqe buffers (ctrl, data) are also reused for all STE writes.
+	 */
+	dep_wqe = mlx5hws_send_add_new_dep_wqe(queue);
+	hws_rule_init_dep_wqe(dep_wqe, rule, mt, attr);
+
+	ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
+	ste_attr.wqe_data = &dep_wqe->wqe_data;
+	apply.wqe_ctrl = &dep_wqe->wqe_ctrl;
+	apply.wqe_data = (__force __be32 *)&dep_wqe->wqe_data;
+	apply.rule_action = rule_actions;
+	apply.queue = queue;
+
+	if (action_stes) {
+		/* Allocate action STEs for rules that need more than match STE */
+		if (!is_update) {
+			ret = hws_rule_alloc_action_ste(rule, attr);
+			if (ret) {
+				mlx5hws_err(ctx, "Failed to allocate action memory %d", ret);
+				mlx5hws_send_abort_new_dep_wqe(queue);
+				return ret;
+			}
+		}
+		/* Skip RX/TX based on the dep_wqe init */
+		ste_attr.rtc_0 = dep_wqe->rtc_0 ?
+				 matcher->action_ste[rule->action_ste_selector].rtc_0_id : 0;
+		ste_attr.rtc_1 = dep_wqe->rtc_1 ?
+				 matcher->action_ste[rule->action_ste_selector].rtc_1_id : 0;
+		/* Action STEs are written to a specific index last to first */
+		ste_attr.direct_index = rule->action_ste_idx + action_stes;
+		apply.next_direct_idx = ste_attr.direct_index;
+	} else {
+		apply.next_direct_idx = 0;
+	}
+
+	for (i = total_stes; i-- > 0;) {
+		mlx5hws_action_apply_setter(&apply, setter--, !i && is_jumbo);
+
+		if (i == 0) {
+			/* Handle last match STE.
+			 * For hash split / linear lookup RTCs, packets reaching any STE
+			 * will always match and perform the specified actions, which
+			 * makes the tag irrelevant.
+			 */
+			if (likely(!mlx5hws_matcher_is_insert_by_idx(matcher) && !is_update))
+				mlx5hws_definer_create_tag(match_param, mt->fc, mt->fc_sz,
+							   (u8 *)dep_wqe->wqe_data.action);
+			else if (is_update)
+				hws_rule_update_copy_tag(rule, &dep_wqe->wqe_data, is_jumbo);
+
+			/* Rule has dependent WQEs, match dep_wqe is queued */
+			if (action_stes || apply.require_dep)
+				break;
+
+			/* Rule has no dependencies, abort dep_wqe and send WQE now */
+			mlx5hws_send_abort_new_dep_wqe(queue);
+			ste_attr.wqe_tag_is_jumbo = is_jumbo;
+			ste_attr.send_attr.notify_hw = !attr->burst;
+			ste_attr.send_attr.user_data = dep_wqe->user_data;
+			ste_attr.send_attr.rule = dep_wqe->rule;
+			ste_attr.rtc_0 = dep_wqe->rtc_0;
+			ste_attr.rtc_1 = dep_wqe->rtc_1;
+			ste_attr.used_id_rtc_0 = &rule->rtc_0;
+			ste_attr.used_id_rtc_1 = &rule->rtc_1;
+			ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
+			ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
+			ste_attr.direct_index = dep_wqe->direct_index;
+		} else {
+			apply.next_direct_idx = --ste_attr.direct_index;
+		}
+
+		mlx5hws_send_ste(queue, &ste_attr);
+	}
+
+	/* Backup TAG on the rule for deletion and resize info for
+	 * moving rules to a new matcher, only after insertion.
+	 */
+	if (!is_update)
+		hws_rule_save_delete_info(rule, &ste_attr);
+
+	hws_rule_save_resize_info(rule, &ste_attr, is_update);
+	mlx5hws_send_engine_inc_rule(queue);
+
+	if (!attr->burst)
+		mlx5hws_send_all_dep_wqe(queue);
+
+	return 0;
+}
+
+static void hws_rule_destroy_failed_hws(struct mlx5hws_rule *rule,
+					struct mlx5hws_rule_attr *attr)
+{
+	struct mlx5hws_context *ctx = rule->matcher->tbl->ctx;
+	struct mlx5hws_send_engine *queue;
+
+	queue = &ctx->send_queue[attr->queue_id];
+
+	hws_rule_gen_comp(queue, rule, false,
+			  attr->user_data, MLX5HWS_RULE_STATUS_DELETED);
+
+	/* Rule failed now we can safely release action STEs */
+	mlx5hws_rule_free_action_ste(rule);
+
+	/* Clear complex tag */
+	hws_rule_clear_delete_info(rule);
+
+	/* Clear info that was saved for resizing */
+	mlx5hws_rule_clear_resize_info(rule);
+
+	/* If a rule that was indicated as burst (need to trigger HW) has failed
+	 * insertion we won't ring the HW as nothing is being written to the WQ.
+	 * In such case update the last WQE and ring the HW with that work
+	 */
+	if (attr->burst)
+		return;
+
+	mlx5hws_send_all_dep_wqe(queue);
+	mlx5hws_send_engine_flush_queue(queue);
+}
+
+static int hws_rule_destroy_hws(struct mlx5hws_rule *rule,
+				struct mlx5hws_rule_attr *attr)
+{
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(rule->matcher->mt);
+	struct mlx5hws_context *ctx = rule->matcher->tbl->ctx;
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_wqe_gta_ctrl_seg wqe_ctrl = {0};
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+	struct mlx5hws_send_engine *queue;
+
+	queue = &ctx->send_queue[attr->queue_id];
+
+	if (unlikely(mlx5hws_send_engine_err(queue))) {
+		hws_rule_destroy_failed_hws(rule, attr);
+		return 0;
+	}
+
+	/* Rule is not completed yet */
+	if (rule->status == MLX5HWS_RULE_STATUS_CREATING)
+		return -EBUSY;
+
+	/* Rule failed and doesn't require cleanup */
+	if (rule->status == MLX5HWS_RULE_STATUS_FAILED) {
+		hws_rule_destroy_failed_hws(rule, attr);
+		return 0;
+	}
+
+	if (rule->skip_delete) {
+		/* Rule shouldn't be deleted in HW.
+		 * Generate completion as if write succeeded, and we can
+		 * safely release action STEs and clear resize info.
+		 */
+		hws_rule_gen_comp(queue, rule, false,
+				  attr->user_data, MLX5HWS_RULE_STATUS_DELETED);
+
+		mlx5hws_rule_free_action_ste(rule);
+		mlx5hws_rule_clear_resize_info(rule);
+		return 0;
+	}
+
+	mlx5hws_send_engine_inc_rule(queue);
+
+	/* Send dependent WQE */
+	if (!attr->burst)
+		mlx5hws_send_all_dep_wqe(queue);
+
+	rule->status = MLX5HWS_RULE_STATUS_DELETING;
+
+	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+
+	ste_attr.send_attr.rule = rule;
+	ste_attr.send_attr.notify_hw = !attr->burst;
+	ste_attr.send_attr.user_data = attr->user_data;
+
+	ste_attr.rtc_0 = rule->rtc_0;
+	ste_attr.rtc_1 = rule->rtc_1;
+	ste_attr.used_id_rtc_0 = &rule->rtc_0;
+	ste_attr.used_id_rtc_1 = &rule->rtc_1;
+	ste_attr.wqe_ctrl = &wqe_ctrl;
+	ste_attr.wqe_tag_is_jumbo = is_jumbo;
+	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_DEACTIVATE;
+	if (unlikely(mlx5hws_matcher_is_insert_by_idx(matcher)))
+		ste_attr.direct_index = attr->rule_idx;
+
+	hws_rule_load_delete_info(rule, &ste_attr);
+	mlx5hws_send_ste(queue, &ste_attr);
+	hws_rule_clear_delete_info(rule);
+
+	return 0;
+}
+
+static int hws_rule_enqueue_precheck(struct mlx5hws_rule *rule,
+				     struct mlx5hws_rule_attr *attr)
+{
+	struct mlx5hws_context *ctx = rule->matcher->tbl->ctx;
+
+	if (unlikely(!attr->user_data))
+		return -EINVAL;
+
+	/* Check if there is room in queue */
+	if (unlikely(mlx5hws_send_engine_full(&ctx->send_queue[attr->queue_id])))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int hws_rule_enqueue_precheck_move(struct mlx5hws_rule *rule,
+					  struct mlx5hws_rule_attr *attr)
+{
+	if (unlikely(rule->status != MLX5HWS_RULE_STATUS_CREATED))
+		return -EINVAL;
+
+	return hws_rule_enqueue_precheck(rule, attr);
+}
+
+static int hws_rule_enqueue_precheck_create(struct mlx5hws_rule *rule,
+					    struct mlx5hws_rule_attr *attr)
+{
+	if (unlikely(mlx5hws_matcher_is_in_resize(rule->matcher)))
+		/* Matcher in resize - new rules are not allowed */
+		return -EAGAIN;
+
+	return hws_rule_enqueue_precheck(rule, attr);
+}
+
+static int hws_rule_enqueue_precheck_update(struct mlx5hws_rule *rule,
+					    struct mlx5hws_rule_attr *attr)
+{
+	struct mlx5hws_matcher *matcher = rule->matcher;
+
+	if (unlikely(!mlx5hws_matcher_is_resizable(rule->matcher) &&
+		     !matcher->attr.optimize_using_rule_idx &&
+		     !mlx5hws_matcher_is_insert_by_idx(matcher))) {
+		return -EOPNOTSUPP;
+	}
+
+	if (unlikely(rule->status != MLX5HWS_RULE_STATUS_CREATED))
+		return -EBUSY;
+
+	return hws_rule_enqueue_precheck_create(rule, attr);
+}
+
+int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule,
+				 void *queue_ptr,
+				 void *user_data)
+{
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(rule->matcher->mt);
+	struct mlx5hws_wqe_gta_ctrl_seg empty_wqe_ctrl = {0};
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_send_engine *queue = queue_ptr;
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+
+	mlx5hws_send_all_dep_wqe(queue);
+
+	rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_DELETING;
+
+	ste_attr.send_attr.fence = 0;
+	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+	ste_attr.send_attr.rule = rule;
+	ste_attr.send_attr.notify_hw = 1;
+	ste_attr.send_attr.user_data = user_data;
+	ste_attr.rtc_0 = rule->resize_info->rtc_0;
+	ste_attr.rtc_1 = rule->resize_info->rtc_1;
+	ste_attr.used_id_rtc_0 = &rule->resize_info->rtc_0;
+	ste_attr.used_id_rtc_1 = &rule->resize_info->rtc_1;
+	ste_attr.wqe_ctrl = &empty_wqe_ctrl;
+	ste_attr.wqe_tag_is_jumbo = is_jumbo;
+	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_DEACTIVATE;
+
+	if (unlikely(mlx5hws_matcher_is_insert_by_idx(matcher)))
+		ste_attr.direct_index = rule->resize_info->rule_idx;
+
+	hws_rule_load_delete_info(rule, &ste_attr);
+	mlx5hws_send_ste(queue, &ste_attr);
+
+	return 0;
+}
+
+int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule,
+			      struct mlx5hws_rule_attr *attr)
+{
+	bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(rule->matcher->mt);
+	struct mlx5hws_context *ctx = rule->matcher->tbl->ctx;
+	struct mlx5hws_matcher *matcher = rule->matcher;
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+	struct mlx5hws_send_engine *queue;
+	int ret;
+
+	ret = hws_rule_enqueue_precheck_move(rule, attr);
+	if (unlikely(ret))
+		return ret;
+
+	queue = &ctx->send_queue[attr->queue_id];
+
+	ret = mlx5hws_send_engine_err(queue);
+	if (ret)
+		return ret;
+
+	hws_rule_move_init(rule, attr);
+	hws_rule_move_get_rtc(rule, &ste_attr);
+
+	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
+	ste_attr.wqe_tag_is_jumbo = is_jumbo;
+
+	ste_attr.send_attr.rule = rule;
+	ste_attr.send_attr.fence = 0;
+	ste_attr.send_attr.notify_hw = !attr->burst;
+	ste_attr.send_attr.user_data = attr->user_data;
+
+	ste_attr.used_id_rtc_0 = &rule->rtc_0;
+	ste_attr.used_id_rtc_1 = &rule->rtc_1;
+	ste_attr.wqe_ctrl = (struct mlx5hws_wqe_gta_ctrl_seg *)rule->resize_info->ctrl_seg;
+	ste_attr.wqe_data = (struct mlx5hws_wqe_gta_data_seg_ste *)rule->resize_info->data_seg;
+	ste_attr.direct_index = mlx5hws_matcher_is_insert_by_idx(matcher) ?
+				attr->rule_idx : 0;
+
+	mlx5hws_send_ste(queue, &ste_attr);
+	mlx5hws_send_engine_inc_rule(queue);
+
+	if (!attr->burst)
+		mlx5hws_send_all_dep_wqe(queue);
+
+	return 0;
+}
+
+int mlx5hws_rule_create(struct mlx5hws_matcher *matcher,
+			u8 mt_idx,
+			u32 *match_param,
+			u8 at_idx,
+			struct mlx5hws_rule_action rule_actions[],
+			struct mlx5hws_rule_attr *attr,
+			struct mlx5hws_rule *rule_handle)
+{
+	int ret;
+
+	rule_handle->matcher = matcher;
+
+	ret = hws_rule_enqueue_precheck_create(rule_handle, attr);
+	if (unlikely(ret))
+		return ret;
+
+	if (unlikely(!(matcher->num_of_mt >= mt_idx) ||
+		     !(matcher->num_of_at >= at_idx) ||
+		     !match_param)) {
+		pr_warn("HWS: Invalid rule creation parameters (MTs, ATs or match params)\n");
+		return -EINVAL;
+	}
+
+	ret = hws_rule_create_hws(rule_handle,
+				  attr,
+				  mt_idx,
+				  match_param,
+				  at_idx,
+				  rule_actions);
+
+	return ret;
+}
+
+int mlx5hws_rule_destroy(struct mlx5hws_rule *rule,
+			 struct mlx5hws_rule_attr *attr)
+{
+	int ret;
+
+	ret = hws_rule_enqueue_precheck(rule, attr);
+	if (unlikely(ret))
+		return ret;
+
+	ret = hws_rule_destroy_hws(rule, attr);
+
+	return ret;
+}
+
+int mlx5hws_rule_action_update(struct mlx5hws_rule *rule,
+			       u8 at_idx,
+			       struct mlx5hws_rule_action rule_actions[],
+			       struct mlx5hws_rule_attr *attr)
+{
+	int ret;
+
+	ret = hws_rule_enqueue_precheck_update(rule, attr);
+	if (unlikely(ret))
+		return ret;
+
+	ret = hws_rule_create_hws(rule,
+				  attr,
+				  0,
+				  NULL,
+				  at_idx,
+				  rule_actions);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.h
new file mode 100644
index 000000000000..495cdd17e9f3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_rule.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_RULE_H_
+#define MLX5HWS_RULE_H_
+
+enum {
+	MLX5HWS_STE_CTRL_SZ = 20,
+	MLX5HWS_ACTIONS_SZ = 12,
+	MLX5HWS_MATCH_TAG_SZ = 32,
+	MLX5HWS_JUMBO_TAG_SZ = 44,
+};
+
+enum mlx5hws_rule_status {
+	MLX5HWS_RULE_STATUS_UNKNOWN,
+	MLX5HWS_RULE_STATUS_CREATING,
+	MLX5HWS_RULE_STATUS_CREATED,
+	MLX5HWS_RULE_STATUS_DELETING,
+	MLX5HWS_RULE_STATUS_DELETED,
+	MLX5HWS_RULE_STATUS_FAILING,
+	MLX5HWS_RULE_STATUS_FAILED,
+};
+
+enum mlx5hws_rule_move_state {
+	MLX5HWS_RULE_RESIZE_STATE_IDLE,
+	MLX5HWS_RULE_RESIZE_STATE_WRITING,
+	MLX5HWS_RULE_RESIZE_STATE_DELETING,
+};
+
+enum mlx5hws_rule_jumbo_match_tag_offset {
+	MLX5HWS_RULE_JUMBO_MATCH_TAG_OFFSET_DW0 = 8,
+};
+
+struct mlx5hws_rule_match_tag {
+	union {
+		u8 jumbo[MLX5HWS_JUMBO_TAG_SZ];
+		struct {
+			u8 reserved[MLX5HWS_ACTIONS_SZ];
+			u8 match[MLX5HWS_MATCH_TAG_SZ];
+		};
+	};
+};
+
+struct mlx5hws_rule_resize_info {
+	struct mlx5hws_pool *action_ste_pool[2];
+	u32 rtc_0;
+	u32 rtc_1;
+	u32 rule_idx;
+	u8 state;
+	u8 max_stes;
+	u8 ctrl_seg[MLX5HWS_WQE_SZ_GTA_CTRL]; /* Ctrl segment of STE: 48 bytes */
+	u8 data_seg[MLX5HWS_WQE_SZ_GTA_DATA]; /* Data segment of STE: 64 bytes */
+};
+
+struct mlx5hws_rule {
+	struct mlx5hws_matcher *matcher;
+	union {
+		struct mlx5hws_rule_match_tag tag;
+		struct mlx5hws_rule_resize_info *resize_info;
+	};
+	u32 rtc_0; /* The RTC into which the STE was inserted */
+	u32 rtc_1; /* The RTC into which the STE was inserted */
+	int action_ste_idx; /* STE array index */
+	u8 status; /* enum mlx5hws_rule_status */
+	u8 action_ste_selector; /* For rule update - which action STE is in use */
+	u8 pending_wqes;
+	bool skip_delete; /* For complex rules - another rule with same tag
+			   * still exists, so don't actually delete this rule.
+			   */
+};
+
+void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule);
+
+int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule,
+				 void *queue, void *user_data);
+
+int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule,
+			      struct mlx5hws_rule_attr *attr);
+
+bool mlx5hws_rule_move_in_progress(struct mlx5hws_rule *rule);
+
+void mlx5hws_rule_clear_resize_info(struct mlx5hws_rule *rule);
+
+#endif /* MLX5HWS_RULE_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c
new file mode 100644
index 000000000000..a1adbb48735c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.c
@@ -0,0 +1,1209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+#include "lib/clock.h"
+
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
+struct mlx5hws_send_ring_dep_wqe *
+mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine *queue)
+{
+	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
+	unsigned int idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
+
+	memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5HWS_MATCH_TAG_SZ);
+
+	return &send_sq->dep_wqe[idx];
+}
+
+void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue)
+{
+	queue->send_ring.send_sq.head_dep_idx--;
+}
+
+void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue)
+{
+	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
+	struct mlx5hws_send_ste_attr ste_attr = {0};
+	struct mlx5hws_send_ring_dep_wqe *dep_wqe;
+
+	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
+	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
+
+	/* Fence first from previous depend WQEs  */
+	ste_attr.send_attr.fence = 1;
+
+	while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
+		dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
+
+		/* Notify HW on the last WQE */
+		ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
+		ste_attr.send_attr.user_data = dep_wqe->user_data;
+		ste_attr.send_attr.rule = dep_wqe->rule;
+
+		ste_attr.rtc_0 = dep_wqe->rtc_0;
+		ste_attr.rtc_1 = dep_wqe->rtc_1;
+		ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
+		ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
+		ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
+		ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
+		ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
+		ste_attr.wqe_data = &dep_wqe->wqe_data;
+		ste_attr.direct_index = dep_wqe->direct_index;
+
+		mlx5hws_send_ste(queue, &ste_attr);
+
+		/* Fencing is done only on the first WQE */
+		ste_attr.send_attr.fence = 0;
+	}
+}
+
+struct mlx5hws_send_engine_post_ctrl
+mlx5hws_send_engine_post_start(struct mlx5hws_send_engine *queue)
+{
+	struct mlx5hws_send_engine_post_ctrl ctrl;
+
+	ctrl.queue = queue;
+	/* Currently only one send ring is supported */
+	ctrl.send_ring = &queue->send_ring;
+	ctrl.num_wqebbs = 0;
+
+	return ctrl;
+}
+
+void mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl *ctrl,
+				      char **buf, size_t *len)
+{
+	struct mlx5hws_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
+	unsigned int idx;
+
+	idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
+
+	/* Note that *buf is a single MLX5_SEND_WQE_BB. It cannot be used
+	 * as buffer of more than one WQE_BB, since the two MLX5_SEND_WQE_BB
+	 * can be on 2 different kernel memory pages.
+	 */
+	*buf = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
+	*len = MLX5_SEND_WQE_BB;
+
+	if (!ctrl->num_wqebbs) {
+		*buf += sizeof(struct mlx5hws_wqe_ctrl_seg);
+		*len -= sizeof(struct mlx5hws_wqe_ctrl_seg);
+	}
+
+	ctrl->num_wqebbs++;
+}
+
+static void hws_send_engine_post_ring(struct mlx5hws_send_ring_sq *sq,
+				      struct mlx5hws_wqe_ctrl_seg *doorbell_cseg)
+{
+	/* ensure wqe is visible to device before updating doorbell record */
+	dma_wmb();
+
+	*sq->wq.db = cpu_to_be32(sq->cur_post);
+
+	/* ensure doorbell record is visible to device before ringing the
+	 * doorbell
+	 */
+	wmb();
+
+	mlx5_write64((__be32 *)doorbell_cseg, sq->uar_map);
+
+	/* Ensure doorbell is written on uar_page before poll_cq */
+	WRITE_ONCE(doorbell_cseg, NULL);
+}
+
+static void
+hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste *wqe_data,
+		     struct mlx5hws_rule_match_tag *tag,
+		     bool is_jumbo)
+{
+	if (is_jumbo) {
+		/* Clear previous possibly dirty control */
+		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ);
+		memcpy(wqe_data->jumbo, tag->jumbo, MLX5HWS_JUMBO_TAG_SZ);
+	} else {
+		/* Clear previous possibly dirty control and actions */
+		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ + MLX5HWS_ACTIONS_SZ);
+		memcpy(wqe_data->tag, tag->match, MLX5HWS_MATCH_TAG_SZ);
+	}
+}
+
+void mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl *ctrl,
+				  struct mlx5hws_send_engine_post_attr *attr)
+{
+	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
+	struct mlx5hws_send_ring_sq *sq;
+	unsigned int idx;
+	u32 flags = 0;
+
+	sq = &ctrl->send_ring->send_sq;
+	idx = sq->cur_post & sq->buf_mask;
+	sq->last_idx = idx;
+
+	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, idx);
+
+	wqe_ctrl->opmod_idx_opcode =
+		cpu_to_be32((attr->opmod << 24) |
+			    ((sq->cur_post & 0xffff) << 8) |
+			    attr->opcode);
+	wqe_ctrl->qpn_ds =
+		cpu_to_be32((attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16 |
+				 sq->sqn << 8);
+	wqe_ctrl->imm = cpu_to_be32(attr->id);
+
+	flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
+	flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
+	wqe_ctrl->flags = cpu_to_be32(flags);
+
+	sq->wr_priv[idx].id = attr->id;
+	sq->wr_priv[idx].retry_id = attr->retry_id;
+
+	sq->wr_priv[idx].rule = attr->rule;
+	sq->wr_priv[idx].user_data = attr->user_data;
+	sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
+
+	if (attr->rule) {
+		sq->wr_priv[idx].rule->pending_wqes++;
+		sq->wr_priv[idx].used_id = attr->used_id;
+	}
+
+	sq->cur_post += ctrl->num_wqebbs;
+
+	if (attr->notify_hw)
+		hws_send_engine_post_ring(sq, wqe_ctrl);
+}
+
+static void hws_send_wqe(struct mlx5hws_send_engine *queue,
+			 struct mlx5hws_send_engine_post_attr *send_attr,
+			 struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
+			 void *send_wqe_data,
+			 void *send_wqe_tag,
+			 bool is_jumbo,
+			 u8 gta_opcode,
+			 u32 direct_index)
+{
+	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5hws_send_engine_post_ctrl ctrl;
+	size_t wqe_len;
+
+	ctrl = mlx5hws_send_engine_post_start(queue);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
+
+	wqe_ctrl->op_dirix = cpu_to_be32(gta_opcode << 28 | direct_index);
+	memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix,
+	       sizeof(send_wqe_ctrl->stc_ix));
+
+	if (send_wqe_data)
+		memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
+	else
+		hws_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
+
+	mlx5hws_send_engine_post_end(&ctrl, send_attr);
+}
+
+void mlx5hws_send_ste(struct mlx5hws_send_engine *queue,
+		      struct mlx5hws_send_ste_attr *ste_attr)
+{
+	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
+	u8 notify_hw = send_attr->notify_hw;
+	u8 fence = send_attr->fence;
+
+	if (ste_attr->rtc_1) {
+		send_attr->id = ste_attr->rtc_1;
+		send_attr->used_id = ste_attr->used_id_rtc_1;
+		send_attr->retry_id = ste_attr->retry_rtc_1;
+		send_attr->fence = fence;
+		send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
+		hws_send_wqe(queue, send_attr,
+			     ste_attr->wqe_ctrl,
+			     ste_attr->wqe_data,
+			     ste_attr->wqe_tag,
+			     ste_attr->wqe_tag_is_jumbo,
+			     ste_attr->gta_opcode,
+			     ste_attr->direct_index);
+	}
+
+	if (ste_attr->rtc_0) {
+		send_attr->id = ste_attr->rtc_0;
+		send_attr->used_id = ste_attr->used_id_rtc_0;
+		send_attr->retry_id = ste_attr->retry_rtc_0;
+		send_attr->fence = fence && !ste_attr->rtc_1;
+		send_attr->notify_hw = notify_hw;
+		hws_send_wqe(queue, send_attr,
+			     ste_attr->wqe_ctrl,
+			     ste_attr->wqe_data,
+			     ste_attr->wqe_tag,
+			     ste_attr->wqe_tag_is_jumbo,
+			     ste_attr->gta_opcode,
+			     ste_attr->direct_index);
+	}
+
+	/* Restore to original requested values */
+	send_attr->notify_hw = notify_hw;
+	send_attr->fence = fence;
+}
+
+static void hws_send_engine_retry_post_send(struct mlx5hws_send_engine *queue,
+					    struct mlx5hws_send_ring_priv *priv,
+					    u16 wqe_cnt)
+{
+	struct mlx5hws_send_engine_post_attr send_attr = {0};
+	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5hws_send_engine_post_ctrl ctrl;
+	struct mlx5hws_send_ring_sq *send_sq;
+	unsigned int idx;
+	size_t wqe_len;
+	char *p;
+
+	send_attr.rule = priv->rule;
+	send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
+	send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
+	send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5hws_wqe_ctrl_seg);
+	send_attr.notify_hw = 1;
+	send_attr.fence = 0;
+	send_attr.user_data = priv->user_data;
+	send_attr.id = priv->retry_id;
+	send_attr.used_id = priv->used_id;
+
+	ctrl = mlx5hws_send_engine_post_start(queue);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
+	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
+
+	send_sq = &ctrl.send_ring->send_sq;
+	idx = wqe_cnt & send_sq->buf_mask;
+	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
+
+	/* Copy old gta ctrl */
+	memcpy(wqe_ctrl, p + sizeof(struct mlx5hws_wqe_ctrl_seg),
+	       MLX5_SEND_WQE_BB - sizeof(struct mlx5hws_wqe_ctrl_seg));
+
+	idx = (wqe_cnt + 1) & send_sq->buf_mask;
+	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
+
+	/* Copy old gta data */
+	memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
+
+	mlx5hws_send_engine_post_end(&ctrl, &send_attr);
+}
+
+void mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine *queue)
+{
+	struct mlx5hws_send_ring_sq *sq = &queue->send_ring.send_sq;
+	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
+
+	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, sq->last_idx);
+	wqe_ctrl->flags |= cpu_to_be32(MLX5_WQE_CTRL_CQ_UPDATE);
+
+	hws_send_engine_post_ring(sq, wqe_ctrl);
+}
+
+static void
+hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue,
+				   struct mlx5hws_send_ring_priv *priv,
+				   enum mlx5hws_flow_op_status *status)
+{
+	switch (priv->rule->resize_info->state) {
+	case MLX5HWS_RULE_RESIZE_STATE_WRITING:
+		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
+			/* Backup original RTCs */
+			u32 orig_rtc_0 = priv->rule->resize_info->rtc_0;
+			u32 orig_rtc_1 = priv->rule->resize_info->rtc_1;
+
+			/* Delete partially failed move rule using resize_info */
+			priv->rule->resize_info->rtc_0 = priv->rule->rtc_0;
+			priv->rule->resize_info->rtc_1 = priv->rule->rtc_1;
+
+			/* Move rule to original RTC for future delete */
+			priv->rule->rtc_0 = orig_rtc_0;
+			priv->rule->rtc_1 = orig_rtc_1;
+		}
+		/* Clean leftovers */
+		mlx5hws_rule_move_hws_remove(priv->rule, queue, priv->user_data);
+		break;
+
+	case MLX5HWS_RULE_RESIZE_STATE_DELETING:
+		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
+			*status = MLX5HWS_FLOW_OP_ERROR;
+		} else {
+			*status = MLX5HWS_FLOW_OP_SUCCESS;
+			priv->rule->matcher = priv->rule->matcher->resize_dst;
+		}
+		priv->rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_IDLE;
+		priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
+					struct mlx5hws_send_ring_priv *priv,
+					u16 wqe_cnt,
+					enum mlx5hws_flow_op_status *status)
+{
+	priv->rule->pending_wqes--;
+
+	if (*status == MLX5HWS_FLOW_OP_ERROR) {
+		if (priv->retry_id) {
+			hws_send_engine_retry_post_send(queue, priv, wqe_cnt);
+			return;
+		}
+		/* Some part of the rule failed */
+		priv->rule->status = MLX5HWS_RULE_STATUS_FAILING;
+		*priv->used_id = 0;
+	} else {
+		*priv->used_id = priv->id;
+	}
+
+	/* Update rule status for the last completion */
+	if (!priv->rule->pending_wqes) {
+		if (unlikely(mlx5hws_rule_move_in_progress(priv->rule))) {
+			hws_send_engine_update_rule_resize(queue, priv, status);
+			return;
+		}
+
+		if (unlikely(priv->rule->status == MLX5HWS_RULE_STATUS_FAILING)) {
+			/* Rule completely failed and doesn't require cleanup */
+			if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
+				priv->rule->status = MLX5HWS_RULE_STATUS_FAILED;
+
+			*status = MLX5HWS_FLOW_OP_ERROR;
+		} else {
+			/* Increase the status, this only works on good flow as the enum
+			 * is arrange it away creating -> created -> deleting -> deleted
+			 */
+			priv->rule->status++;
+			*status = MLX5HWS_FLOW_OP_SUCCESS;
+			/* Rule was deleted now we can safely release action STEs
+			 * and clear resize info
+			 */
+			if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) {
+				mlx5hws_rule_free_action_ste(priv->rule);
+				mlx5hws_rule_clear_resize_info(priv->rule);
+			}
+		}
+	}
+}
+
+static void hws_send_engine_update(struct mlx5hws_send_engine *queue,
+				   struct mlx5_cqe64 *cqe,
+				   struct mlx5hws_send_ring_priv *priv,
+				   struct mlx5hws_flow_op_result res[],
+				   s64 *i,
+				   u32 res_nb,
+				   u16 wqe_cnt)
+{
+	enum mlx5hws_flow_op_status status;
+
+	if (!cqe || (likely(be32_to_cpu(cqe->byte_cnt) >> 31 == 0) &&
+		     likely(get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
+		status = MLX5HWS_FLOW_OP_SUCCESS;
+	} else {
+		status = MLX5HWS_FLOW_OP_ERROR;
+	}
+
+	if (priv->user_data) {
+		if (priv->rule) {
+			hws_send_engine_update_rule(queue, priv, wqe_cnt, &status);
+			/* Completion is provided on the last rule WQE */
+			if (priv->rule->pending_wqes)
+				return;
+		}
+
+		if (*i < res_nb) {
+			res[*i].user_data = priv->user_data;
+			res[*i].status = status;
+			(*i)++;
+			mlx5hws_send_engine_dec_rule(queue);
+		} else {
+			mlx5hws_send_engine_gen_comp(queue, priv->user_data, status);
+		}
+	}
+}
+
+static int mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq *cq,
+			     struct mlx5_cqe64 *cqe64)
+{
+	if (unlikely(get_cqe_opcode(cqe64) != MLX5_CQE_REQ)) {
+		struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe64;
+
+		mlx5_core_err(cq->mdev, "Bad OP in HWS SQ CQE: 0x%x\n", get_cqe_opcode(cqe64));
+		mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", err_cqe->vendor_err_synd);
+		mlx5_core_err(cq->mdev, "syndrome=%x\n", err_cqe->syndrome);
+		print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
+			       16, 1, err_cqe,
+			       sizeof(*err_cqe), false);
+		return CQ_POLL_ERR;
+	}
+
+	return CQ_OK;
+}
+
+static int mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq *cq)
+{
+	struct mlx5_cqe64 *cqe64;
+	int err;
+
+	cqe64 = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe64) {
+		if (unlikely(cq->mdev->state ==
+			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
+			mlx5_core_dbg_once(cq->mdev,
+					   "Polling CQ while device is shutting down\n");
+			return CQ_POLL_ERR;
+		}
+		return CQ_EMPTY;
+	}
+
+	mlx5_cqwq_pop(&cq->wq);
+	err = mlx5hws_parse_cqe(cq, cqe64);
+	mlx5_cqwq_update_db_record(&cq->wq);
+
+	return err;
+}
+
+static void hws_send_engine_poll_cq(struct mlx5hws_send_engine *queue,
+				    struct mlx5hws_flow_op_result res[],
+				    s64 *polled,
+				    u32 res_nb)
+{
+	struct mlx5hws_send_ring *send_ring = &queue->send_ring;
+	struct mlx5hws_send_ring_cq *cq = &send_ring->send_cq;
+	struct mlx5hws_send_ring_sq *sq = &send_ring->send_sq;
+	struct mlx5hws_send_ring_priv *priv;
+	struct mlx5_cqe64 *cqe;
+	u8 cqe_opcode;
+	u16 wqe_cnt;
+
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe)
+		return;
+
+	cqe_opcode = get_cqe_opcode(cqe);
+	if (cqe_opcode == MLX5_CQE_INVALID)
+		return;
+
+	if (unlikely(cqe_opcode != MLX5_CQE_REQ))
+		queue->err = true;
+
+	wqe_cnt = be16_to_cpu(cqe->wqe_counter) & sq->buf_mask;
+
+	while (cq->poll_wqe != wqe_cnt) {
+		priv = &sq->wr_priv[cq->poll_wqe];
+		hws_send_engine_update(queue, NULL, priv, res, polled, res_nb, 0);
+		cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
+	}
+
+	priv = &sq->wr_priv[wqe_cnt];
+	cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
+	hws_send_engine_update(queue, cqe, priv, res, polled, res_nb, wqe_cnt);
+	mlx5hws_cq_poll_one(cq);
+}
+
+static void hws_send_engine_poll_list(struct mlx5hws_send_engine *queue,
+				      struct mlx5hws_flow_op_result res[],
+				      s64 *polled,
+				      u32 res_nb)
+{
+	struct mlx5hws_completed_poll *comp = &queue->completed;
+
+	while (comp->ci != comp->pi) {
+		if (*polled < res_nb) {
+			res[*polled].status =
+				comp->entries[comp->ci].status;
+			res[*polled].user_data =
+				comp->entries[comp->ci].user_data;
+			(*polled)++;
+			comp->ci = (comp->ci + 1) & comp->mask;
+			mlx5hws_send_engine_dec_rule(queue);
+		} else {
+			return;
+		}
+	}
+}
+
+static int hws_send_engine_poll(struct mlx5hws_send_engine *queue,
+				struct mlx5hws_flow_op_result res[],
+				u32 res_nb)
+{
+	s64 polled = 0;
+
+	hws_send_engine_poll_list(queue, res, &polled, res_nb);
+
+	if (polled >= res_nb)
+		return polled;
+
+	hws_send_engine_poll_cq(queue, res, &polled, res_nb);
+
+	return polled;
+}
+
+int mlx5hws_send_queue_poll(struct mlx5hws_context *ctx,
+			    u16 queue_id,
+			    struct mlx5hws_flow_op_result res[],
+			    u32 res_nb)
+{
+	return hws_send_engine_poll(&ctx->send_queue[queue_id], res, res_nb);
+}
+
+static int hws_send_ring_alloc_sq(struct mlx5_core_dev *mdev,
+				  int numa_node,
+				  struct mlx5hws_send_engine *queue,
+				  struct mlx5hws_send_ring_sq *sq,
+				  void *sqc_data)
+{
+	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5_wq_param param;
+	size_t buf_sz;
+	int err;
+
+	sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+	sq->mdev = mdev;
+
+	param.db_numa_node = numa_node;
+	param.buf_numa_node = numa_node;
+	err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
+	if (err)
+		return err;
+	wq->db = &wq->db[MLX5_SND_DBR];
+
+	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
+	sq->dep_wqe = kcalloc(queue->num_entries, sizeof(*sq->dep_wqe), GFP_KERNEL);
+	if (!sq->dep_wqe) {
+		err = -ENOMEM;
+		goto destroy_wq_cyc;
+	}
+
+	sq->wr_priv = kzalloc(sizeof(*sq->wr_priv) * buf_sz, GFP_KERNEL);
+	if (!sq->wr_priv) {
+		err = -ENOMEM;
+		goto free_dep_wqe;
+	}
+
+	sq->buf_mask = (queue->num_entries * MAX_WQES_PER_RULE) - 1;
+
+	return 0;
+
+free_dep_wqe:
+	kfree(sq->dep_wqe);
+destroy_wq_cyc:
+	mlx5_wq_destroy(&sq->wq_ctrl);
+	return err;
+}
+
+static void hws_send_ring_free_sq(struct mlx5hws_send_ring_sq *sq)
+{
+	if (!sq)
+		return;
+	kfree(sq->wr_priv);
+	kfree(sq->dep_wqe);
+	mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn,
+				   void *sqc_data,
+				   struct mlx5hws_send_engine *queue,
+				   struct mlx5hws_send_ring_sq *sq,
+				   struct mlx5hws_send_ring_cq *cq)
+{
+	void *in, *sqc, *wq;
+	int inlen, err;
+	u8 ts_format;
+
+	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+		sizeof(u64) * sq->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+	wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+	MLX5_SET(sqc, sqc, cqn, cq->mcq.cqn);
+
+	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+	ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+						 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+	MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+	MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
+	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+	mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
+{
+	void *in, *sqc;
+	int inlen, err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+	err = mlx5_core_modify_sq(mdev, sqn, in);
+
+	kvfree(in);
+
+	return err;
+}
+
+static void hws_send_ring_close_sq(struct mlx5hws_send_ring_sq *sq)
+{
+	mlx5_core_destroy_sq(sq->mdev, sq->sqn);
+	mlx5_wq_destroy(&sq->wq_ctrl);
+	kfree(sq->wr_priv);
+	kfree(sq->dep_wqe);
+}
+
+static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
+				       void *sqc_data,
+				       struct mlx5hws_send_engine *queue,
+				       struct mlx5hws_send_ring_sq *sq,
+				       struct mlx5hws_send_ring_cq *cq)
+{
+	int err;
+
+	err = hws_send_ring_create_sq(mdev, pdn, sqc_data, queue, sq, cq);
+	if (err)
+		return err;
+
+	err = hws_send_ring_set_sq_rdy(mdev, sq->sqn);
+	if (err)
+		hws_send_ring_close_sq(sq);
+
+	return err;
+}
+
+static int hws_send_ring_open_sq(struct mlx5hws_context *ctx,
+				 int numa_node,
+				 struct mlx5hws_send_engine *queue,
+				 struct mlx5hws_send_ring_sq *sq,
+				 struct mlx5hws_send_ring_cq *cq)
+{
+	size_t buf_sz, sq_log_buf_sz;
+	void *sqc_data, *wq;
+	int err;
+
+	sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+	if (!sqc_data)
+		return -ENOMEM;
+
+	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
+	sq_log_buf_sz = ilog2(roundup_pow_of_two(buf_sz));
+
+	wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+	MLX5_SET(wq, wq, pd, ctx->pd_num);
+	MLX5_SET(wq, wq, log_wq_sz, sq_log_buf_sz);
+
+	err = hws_send_ring_alloc_sq(ctx->mdev, numa_node, queue, sq, sqc_data);
+	if (err)
+		goto err_free_sqc;
+
+	err = hws_send_ring_create_sq_rdy(ctx->mdev, ctx->pd_num, sqc_data,
+					  queue, sq, cq);
+	if (err)
+		goto err_free_sq;
+
+	kvfree(sqc_data);
+
+	return 0;
+err_free_sq:
+	hws_send_ring_free_sq(sq);
+err_free_sqc:
+	kvfree(sqc_data);
+	return err;
+}
+
+static void hws_cq_complete(struct mlx5_core_cq *mcq,
+			    struct mlx5_eqe *eqe)
+{
+	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
+}
+
+static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
+				  int numa_node,
+				  struct mlx5hws_send_engine *queue,
+				  void *cqc_data,
+				  struct mlx5hws_send_ring_cq *cq)
+{
+	struct mlx5_core_cq *mcq = &cq->mcq;
+	struct mlx5_wq_param param;
+	struct mlx5_cqe64 *cqe;
+	int err;
+	u32 i;
+
+	param.buf_numa_node = numa_node;
+	param.db_numa_node = numa_node;
+
+	err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
+	if (err)
+		return err;
+
+	mcq->cqe_sz = 64;
+	mcq->set_ci_db = cq->wq_ctrl.db.db;
+	mcq->arm_db = cq->wq_ctrl.db.db + 1;
+	mcq->comp = hws_cq_complete;
+
+	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+		cqe->op_own = 0xf1;
+	}
+
+	cq->mdev = mdev;
+
+	return 0;
+}
+
+static int hws_send_ring_create_cq(struct mlx5_core_dev *mdev,
+				   struct mlx5hws_send_engine *queue,
+				   void *cqc_data,
+				   struct mlx5hws_send_ring_cq *cq)
+{
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+	struct mlx5_core_cq *mcq = &cq->mcq;
+	void *in, *cqc;
+	int inlen, eqn;
+	int err;
+
+	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
+	if (err)
+		return err;
+
+	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		sizeof(u64) * cq->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+	memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+	MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+	kvfree(in);
+
+	return err;
+}
+
+static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev,
+				 struct mlx5hws_send_engine *queue,
+				 int numa_node,
+				 struct mlx5hws_send_ring_cq *cq)
+{
+	void *cqc_data;
+	int err;
+
+	cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+	if (!cqc_data)
+		return -ENOMEM;
+
+	MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
+	MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries);
+	MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries));
+
+	err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq);
+	if (err)
+		goto err_out;
+
+	err = hws_send_ring_create_cq(mdev, queue, cqc_data, cq);
+	if (err)
+		goto err_free_cq;
+
+	kvfree(cqc_data);
+
+	return 0;
+
+err_free_cq:
+	mlx5_wq_destroy(&cq->wq_ctrl);
+err_out:
+	kvfree(cqc_data);
+	return err;
+}
+
+static void hws_send_ring_close_cq(struct mlx5hws_send_ring_cq *cq)
+{
+	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+	mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static void hws_send_ring_close(struct mlx5hws_send_engine *queue)
+{
+	hws_send_ring_close_sq(&queue->send_ring.send_sq);
+	hws_send_ring_close_cq(&queue->send_ring.send_cq);
+}
+
+static int mlx5hws_send_ring_open(struct mlx5hws_context *ctx,
+				  struct mlx5hws_send_engine *queue)
+{
+	int numa_node = dev_to_node(mlx5_core_dma_dev(ctx->mdev));
+	struct mlx5hws_send_ring *ring = &queue->send_ring;
+	int err;
+
+	err = hws_send_ring_open_cq(ctx->mdev, queue, numa_node, &ring->send_cq);
+	if (err)
+		return err;
+
+	err = hws_send_ring_open_sq(ctx, numa_node, queue, &ring->send_sq,
+				    &ring->send_cq);
+	if (err)
+		goto close_cq;
+
+	return err;
+
+close_cq:
+	hws_send_ring_close_cq(&ring->send_cq);
+	return err;
+}
+
+void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue)
+{
+	hws_send_ring_close(queue);
+	kfree(queue->completed.entries);
+}
+
+int mlx5hws_send_queue_open(struct mlx5hws_context *ctx,
+			    struct mlx5hws_send_engine *queue,
+			    u16 queue_size)
+{
+	int err;
+
+	mutex_init(&queue->lock);
+
+	queue->num_entries = roundup_pow_of_two(queue_size);
+	queue->used_entries = 0;
+
+	queue->completed.entries = kcalloc(queue->num_entries,
+					   sizeof(queue->completed.entries[0]),
+					   GFP_KERNEL);
+	if (!queue->completed.entries)
+		return -ENOMEM;
+
+	queue->completed.pi = 0;
+	queue->completed.ci = 0;
+	queue->completed.mask = queue->num_entries - 1;
+	err = mlx5hws_send_ring_open(ctx, queue);
+	if (err)
+		goto free_completed_entries;
+
+	return 0;
+
+free_completed_entries:
+	kfree(queue->completed.entries);
+	return err;
+}
+
+static void __hws_send_queues_close(struct mlx5hws_context *ctx, u16 queues)
+{
+	while (queues--)
+		mlx5hws_send_queue_close(&ctx->send_queue[queues]);
+}
+
+static void hws_send_queues_bwc_locks_destroy(struct mlx5hws_context *ctx)
+{
+	int bwc_queues = ctx->queues - 1;
+	int i;
+
+	if (!mlx5hws_context_bwc_supported(ctx))
+		return;
+
+	for (i = 0; i < bwc_queues; i++)
+		mutex_destroy(&ctx->bwc_send_queue_locks[i]);
+	kfree(ctx->bwc_send_queue_locks);
+}
+
+void mlx5hws_send_queues_close(struct mlx5hws_context *ctx)
+{
+	hws_send_queues_bwc_locks_destroy(ctx);
+	__hws_send_queues_close(ctx, ctx->queues);
+	kfree(ctx->send_queue);
+}
+
+static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx)
+{
+	/* Number of BWC queues is equal to number of the usual HWS queues */
+	int bwc_queues = ctx->queues - 1;
+	int i;
+
+	if (!mlx5hws_context_bwc_supported(ctx))
+		return 0;
+
+	ctx->queues += bwc_queues;
+
+	ctx->bwc_send_queue_locks = kcalloc(bwc_queues,
+					    sizeof(*ctx->bwc_send_queue_locks),
+					    GFP_KERNEL);
+
+	if (!ctx->bwc_send_queue_locks)
+		return -ENOMEM;
+
+	for (i = 0; i < bwc_queues; i++)
+		mutex_init(&ctx->bwc_send_queue_locks[i]);
+
+	return 0;
+}
+
+int mlx5hws_send_queues_open(struct mlx5hws_context *ctx,
+			     u16 queues,
+			     u16 queue_size)
+{
+	int err = 0;
+	u32 i;
+
+	/* Open one extra queue for control path */
+	ctx->queues = queues + 1;
+
+	/* open a separate set of queues and locks for bwc API */
+	err = hws_bwc_send_queues_init(ctx);
+	if (err)
+		return err;
+
+	ctx->send_queue = kcalloc(ctx->queues, sizeof(*ctx->send_queue), GFP_KERNEL);
+	if (!ctx->send_queue) {
+		err = -ENOMEM;
+		goto free_bwc_locks;
+	}
+
+	for (i = 0; i < ctx->queues; i++) {
+		err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
+		if (err)
+			goto close_send_queues;
+	}
+
+	return 0;
+
+close_send_queues:
+	 __hws_send_queues_close(ctx, i);
+
+	kfree(ctx->send_queue);
+
+free_bwc_locks:
+	hws_send_queues_bwc_locks_destroy(ctx);
+
+	return err;
+}
+
+int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
+			      u16 queue_id,
+			      u32 actions)
+{
+	struct mlx5hws_send_ring_sq *send_sq;
+	struct mlx5hws_send_engine *queue;
+	bool wait_comp = false;
+	s64 polled = 0;
+
+	queue = &ctx->send_queue[queue_id];
+	send_sq = &queue->send_ring.send_sq;
+
+	switch (actions) {
+	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC:
+		wait_comp = true;
+		fallthrough;
+	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_ASYNC:
+		if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
+			/* Send dependent WQEs to drain the queue */
+			mlx5hws_send_all_dep_wqe(queue);
+		else
+			/* Signal on the last posted WQE */
+			mlx5hws_send_engine_flush_queue(queue);
+
+		/* Poll queue until empty */
+		while (wait_comp && !mlx5hws_send_engine_empty(queue))
+			hws_send_engine_poll_cq(queue, NULL, &polled, 0);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+hws_send_wqe_fw(struct mlx5_core_dev *mdev,
+		u32 pd_num,
+		struct mlx5hws_send_engine_post_attr *send_attr,
+		struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
+		void *send_wqe_match_data,
+		void *send_wqe_match_tag,
+		void *send_wqe_range_data,
+		void *send_wqe_range_tag,
+		bool is_jumbo,
+		u8 gta_opcode)
+{
+	bool has_range = send_wqe_range_data || send_wqe_range_tag;
+	bool has_match = send_wqe_match_data || send_wqe_match_tag;
+	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data0 = {0};
+	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data1 = {0};
+	struct mlx5hws_wqe_gta_ctrl_seg gta_wqe_ctrl = {0};
+	struct mlx5hws_cmd_generate_wqe_attr attr = {0};
+	struct mlx5hws_wqe_ctrl_seg wqe_ctrl = {0};
+	struct mlx5_cqe64 cqe;
+	u32 flags = 0;
+	int ret;
+
+	/* Set WQE control */
+	wqe_ctrl.opmod_idx_opcode = cpu_to_be32((send_attr->opmod << 24) | send_attr->opcode);
+	wqe_ctrl.qpn_ds = cpu_to_be32((send_attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16);
+	flags |= send_attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
+	wqe_ctrl.flags = cpu_to_be32(flags);
+	wqe_ctrl.imm = cpu_to_be32(send_attr->id);
+
+	/* Set GTA WQE CTRL */
+	memcpy(gta_wqe_ctrl.stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
+	gta_wqe_ctrl.op_dirix = cpu_to_be32(gta_opcode << 28);
+
+	/* Set GTA match WQE DATA */
+	if (has_match) {
+		if (send_wqe_match_data)
+			memcpy(&gta_wqe_data0, send_wqe_match_data, sizeof(gta_wqe_data0));
+		else
+			hws_send_wqe_set_tag(&gta_wqe_data0, send_wqe_match_tag, is_jumbo);
+
+		gta_wqe_data0.rsvd1_definer = cpu_to_be32(send_attr->match_definer_id << 8);
+		attr.gta_data_0 = (u8 *)&gta_wqe_data0;
+	}
+
+	/* Set GTA range WQE DATA */
+	if (has_range) {
+		if (send_wqe_range_data)
+			memcpy(&gta_wqe_data1, send_wqe_range_data, sizeof(gta_wqe_data1));
+		else
+			hws_send_wqe_set_tag(&gta_wqe_data1, send_wqe_range_tag, false);
+
+		gta_wqe_data1.rsvd1_definer = cpu_to_be32(send_attr->range_definer_id << 8);
+		attr.gta_data_1 = (u8 *)&gta_wqe_data1;
+	}
+
+	attr.pdn = pd_num;
+	attr.wqe_ctrl = (u8 *)&wqe_ctrl;
+	attr.gta_ctrl = (u8 *)&gta_wqe_ctrl;
+
+send_wqe:
+	ret = mlx5hws_cmd_generate_wqe(mdev, &attr, &cqe);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to write WQE using command");
+		return ret;
+	}
+
+	if ((get_cqe_opcode(&cqe) == MLX5_CQE_REQ) &&
+	    (be32_to_cpu(cqe.byte_cnt) >> 31 == 0)) {
+		*send_attr->used_id = send_attr->id;
+		return 0;
+	}
+
+	/* Retry if rule failed */
+	if (send_attr->retry_id) {
+		wqe_ctrl.imm = cpu_to_be32(send_attr->retry_id);
+		send_attr->id = send_attr->retry_id;
+		send_attr->retry_id = 0;
+		goto send_wqe;
+	}
+
+	return -1;
+}
+
+void mlx5hws_send_stes_fw(struct mlx5hws_context *ctx,
+			  struct mlx5hws_send_engine *queue,
+			  struct mlx5hws_send_ste_attr *ste_attr)
+{
+	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
+	struct mlx5hws_rule *rule = send_attr->rule;
+	struct mlx5_core_dev *mdev;
+	u16 queue_id;
+	u32 pdn;
+	int ret;
+
+	queue_id = queue - ctx->send_queue;
+	mdev = ctx->mdev;
+	pdn = ctx->pd_num;
+
+	/* Writing through FW can't HW fence, therefore we drain the queue */
+	if (send_attr->fence)
+		mlx5hws_send_queue_action(ctx,
+					  queue_id,
+					  MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
+
+	if (ste_attr->rtc_1) {
+		send_attr->id = ste_attr->rtc_1;
+		send_attr->used_id = ste_attr->used_id_rtc_1;
+		send_attr->retry_id = ste_attr->retry_rtc_1;
+		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
+				      ste_attr->wqe_ctrl,
+				      ste_attr->wqe_data,
+				      ste_attr->wqe_tag,
+				      ste_attr->range_wqe_data,
+				      ste_attr->range_wqe_tag,
+				      ste_attr->wqe_tag_is_jumbo,
+				      ste_attr->gta_opcode);
+		if (ret)
+			goto fail_rule;
+	}
+
+	if (ste_attr->rtc_0) {
+		send_attr->id = ste_attr->rtc_0;
+		send_attr->used_id = ste_attr->used_id_rtc_0;
+		send_attr->retry_id = ste_attr->retry_rtc_0;
+		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
+				      ste_attr->wqe_ctrl,
+				      ste_attr->wqe_data,
+				      ste_attr->wqe_tag,
+				      ste_attr->range_wqe_data,
+				      ste_attr->range_wqe_tag,
+				      ste_attr->wqe_tag_is_jumbo,
+				      ste_attr->gta_opcode);
+		if (ret)
+			goto fail_rule;
+	}
+
+	/* Increase the status, this only works on good flow as the enum
+	 * is arrange it away creating -> created -> deleting -> deleted
+	 */
+	if (likely(rule))
+		rule->status++;
+
+	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_SUCCESS);
+
+	return;
+
+fail_rule:
+	if (likely(rule))
+		rule->status = !rule->rtc_0 && !rule->rtc_1 ?
+			MLX5HWS_RULE_STATUS_FAILED : MLX5HWS_RULE_STATUS_FAILING;
+
+	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_ERROR);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.h
new file mode 100644
index 000000000000..b50825d6dc53
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_send.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_SEND_H_
+#define MLX5HWS_SEND_H_
+
+/* As a single operation requires at least two WQEBBS.
+ * This means a maximum of 16 such operations per rule.
+ */
+#define MAX_WQES_PER_RULE 32
+
+enum mlx5hws_wqe_opcode {
+	MLX5HWS_WQE_OPCODE_TBL_ACCESS = 0x2c,
+};
+
+enum mlx5hws_wqe_opmod {
+	MLX5HWS_WQE_OPMOD_GTA_STE = 0,
+	MLX5HWS_WQE_OPMOD_GTA_MOD_ARG = 1,
+};
+
+enum mlx5hws_wqe_gta_opcode {
+	MLX5HWS_WQE_GTA_OP_ACTIVATE = 0,
+	MLX5HWS_WQE_GTA_OP_DEACTIVATE = 1,
+};
+
+enum mlx5hws_wqe_gta_opmod {
+	MLX5HWS_WQE_GTA_OPMOD_STE = 0,
+	MLX5HWS_WQE_GTA_OPMOD_MOD_ARG = 1,
+};
+
+enum mlx5hws_wqe_gta_sz {
+	MLX5HWS_WQE_SZ_GTA_CTRL = 48,
+	MLX5HWS_WQE_SZ_GTA_DATA = 64,
+};
+
+/* WQE Control segment. */
+struct mlx5hws_wqe_ctrl_seg {
+	__be32 opmod_idx_opcode;
+	__be32 qpn_ds;
+	__be32 flags;
+	__be32 imm;
+};
+
+struct mlx5hws_wqe_gta_ctrl_seg {
+	__be32 op_dirix;
+	__be32 stc_ix[5];
+	__be32 rsvd0[6];
+};
+
+struct mlx5hws_wqe_gta_data_seg_ste {
+	__be32 rsvd0_ctr_id;
+	__be32 rsvd1_definer;
+	__be32 rsvd2[3];
+	union {
+		struct {
+		__be32 action[3];
+		__be32 tag[8];
+		};
+		__be32 jumbo[11];
+	};
+};
+
+struct mlx5hws_wqe_gta_data_seg_arg {
+	__be32 action_args[8];
+};
+
+struct mlx5hws_wqe_gta {
+	struct mlx5hws_wqe_gta_ctrl_seg gta_ctrl;
+	union {
+		struct mlx5hws_wqe_gta_data_seg_ste seg_ste;
+		struct mlx5hws_wqe_gta_data_seg_arg seg_arg;
+	};
+};
+
+struct mlx5hws_send_ring_cq {
+	struct mlx5_core_dev *mdev;
+	struct mlx5_cqwq wq;
+	struct mlx5_wq_ctrl wq_ctrl;
+	struct mlx5_core_cq mcq;
+	u16 poll_wqe;
+};
+
+struct mlx5hws_send_ring_priv {
+	struct mlx5hws_rule *rule;
+	void *user_data;
+	u32 num_wqebbs;
+	u32 id;
+	u32 retry_id;
+	u32 *used_id;
+};
+
+struct mlx5hws_send_ring_dep_wqe {
+	struct mlx5hws_wqe_gta_ctrl_seg wqe_ctrl;
+	struct mlx5hws_wqe_gta_data_seg_ste wqe_data;
+	struct mlx5hws_rule *rule;
+	u32 rtc_0;
+	u32 rtc_1;
+	u32 retry_rtc_0;
+	u32 retry_rtc_1;
+	u32 direct_index;
+	void *user_data;
+};
+
+struct mlx5hws_send_ring_sq {
+	struct mlx5_core_dev *mdev;
+	u16 cur_post;
+	u16 buf_mask;
+	struct mlx5hws_send_ring_priv *wr_priv;
+	unsigned int last_idx;
+	struct mlx5hws_send_ring_dep_wqe *dep_wqe;
+	unsigned int head_dep_idx;
+	unsigned int tail_dep_idx;
+	u32 sqn;
+	struct mlx5_wq_cyc wq;
+	struct mlx5_wq_ctrl wq_ctrl;
+	void __iomem *uar_map;
+};
+
+struct mlx5hws_send_ring {
+	struct mlx5hws_send_ring_cq send_cq;
+	struct mlx5hws_send_ring_sq send_sq;
+};
+
+struct mlx5hws_completed_poll_entry {
+	void *user_data;
+	enum mlx5hws_flow_op_status status;
+};
+
+struct mlx5hws_completed_poll {
+	struct mlx5hws_completed_poll_entry *entries;
+	u16 ci;
+	u16 pi;
+	u16 mask;
+};
+
+struct mlx5hws_send_engine {
+	struct mlx5hws_send_ring send_ring;
+	struct mlx5_uars_page *uar; /* Uar is shared between rings of a queue */
+	struct mlx5hws_completed_poll completed;
+	u16 used_entries;
+	u16 num_entries;
+	bool err;
+	struct mutex lock; /* Protects the send engine */
+};
+
+struct mlx5hws_send_engine_post_ctrl {
+	struct mlx5hws_send_engine *queue;
+	struct mlx5hws_send_ring *send_ring;
+	size_t num_wqebbs;
+};
+
+struct mlx5hws_send_engine_post_attr {
+	u8 opcode;
+	u8 opmod;
+	u8 notify_hw;
+	u8 fence;
+	u8 match_definer_id;
+	u8 range_definer_id;
+	size_t len;
+	struct mlx5hws_rule *rule;
+	u32 id;
+	u32 retry_id;
+	u32 *used_id;
+	void *user_data;
+};
+
+struct mlx5hws_send_ste_attr {
+	u32 rtc_0;
+	u32 rtc_1;
+	u32 retry_rtc_0;
+	u32 retry_rtc_1;
+	u32 *used_id_rtc_0;
+	u32 *used_id_rtc_1;
+	bool wqe_tag_is_jumbo;
+	u8 gta_opcode;
+	u32 direct_index;
+	struct mlx5hws_send_engine_post_attr send_attr;
+	struct mlx5hws_rule_match_tag *wqe_tag;
+	struct mlx5hws_rule_match_tag *range_wqe_tag;
+	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
+	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
+	struct mlx5hws_wqe_gta_data_seg_ste *range_wqe_data;
+};
+
+struct mlx5hws_send_ring_dep_wqe *
+mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine *queue);
+
+void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue);
+
+void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue);
+
+void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue);
+
+int mlx5hws_send_queue_open(struct mlx5hws_context *ctx,
+			    struct mlx5hws_send_engine *queue,
+			    u16 queue_size);
+
+void mlx5hws_send_queues_close(struct mlx5hws_context *ctx);
+
+int mlx5hws_send_queues_open(struct mlx5hws_context *ctx,
+			     u16 queues,
+			     u16 queue_size);
+
+int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
+			      u16 queue_id,
+			      u32 actions);
+
+int mlx5hws_send_test(struct mlx5hws_context *ctx,
+		      u16 queues,
+		      u16 queue_size);
+
+struct mlx5hws_send_engine_post_ctrl
+mlx5hws_send_engine_post_start(struct mlx5hws_send_engine *queue);
+
+void mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl *ctrl,
+				      char **buf, size_t *len);
+
+void mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl *ctrl,
+				  struct mlx5hws_send_engine_post_attr *attr);
+
+void mlx5hws_send_ste(struct mlx5hws_send_engine *queue,
+		      struct mlx5hws_send_ste_attr *ste_attr);
+
+void mlx5hws_send_stes_fw(struct mlx5hws_context *ctx,
+			  struct mlx5hws_send_engine *queue,
+			  struct mlx5hws_send_ste_attr *ste_attr);
+
+void mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine *queue);
+
+static inline bool mlx5hws_send_engine_empty(struct mlx5hws_send_engine *queue)
+{
+	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
+	struct mlx5hws_send_ring_cq *send_cq = &queue->send_ring.send_cq;
+
+	return ((send_sq->cur_post & send_sq->buf_mask) == send_cq->poll_wqe);
+}
+
+static inline bool mlx5hws_send_engine_full(struct mlx5hws_send_engine *queue)
+{
+	return queue->used_entries >= queue->num_entries;
+}
+
+static inline void mlx5hws_send_engine_inc_rule(struct mlx5hws_send_engine *queue)
+{
+	queue->used_entries++;
+}
+
+static inline void mlx5hws_send_engine_dec_rule(struct mlx5hws_send_engine *queue)
+{
+	queue->used_entries--;
+}
+
+static inline void mlx5hws_send_engine_gen_comp(struct mlx5hws_send_engine *queue,
+						void *user_data,
+						int comp_status)
+{
+	struct mlx5hws_completed_poll *comp = &queue->completed;
+
+	comp->entries[comp->pi].status = comp_status;
+	comp->entries[comp->pi].user_data = user_data;
+
+	comp->pi = (comp->pi + 1) & comp->mask;
+}
+
+static inline bool mlx5hws_send_engine_err(struct mlx5hws_send_engine *queue)
+{
+	return queue->err;
+}
+
+#endif /* MLX5HWS_SEND_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.c
new file mode 100644
index 000000000000..8c063a8d87d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl)
+{
+	return tbl->ft_id;
+}
+
+static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl,
+					struct mlx5hws_cmd_ft_create_attr *ft_attr)
+{
+	ft_attr->type = tbl->fw_ft_type;
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
+		ft_attr->level = tbl->ctx->caps->fdb_ft.max_level - 1;
+	else
+		ft_attr->level = tbl->ctx->caps->nic_ft.max_level - 1;
+	ft_attr->rtc_valid = true;
+}
+
+static void hws_table_set_cap_attr(struct mlx5hws_table *tbl,
+				   struct mlx5hws_cmd_ft_create_attr *ft_attr)
+{
+	/* Enabling reformat_en or decap_en for the first flow table
+	 * must be done when all VFs are down.
+	 * However, HWS doesn't know when it is required to create the first FT.
+	 * On the other hand, HWS doesn't use all these FT capabilities at all
+	 * (the API doesn't even provide a way to specify these flags), so we'll
+	 * just set these caps on all the flow tables.
+	 * If HCA_CAP.fdb_dynamic_tunnel is set, this constraint is N/A.
+	 */
+	if (!MLX5_CAP_ESW_FLOWTABLE(tbl->ctx->mdev, fdb_dynamic_tunnel)) {
+		ft_attr->reformat_en = true;
+		ft_attr->decap_en = true;
+	}
+}
+
+static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
+	struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
+	struct mlx5hws_cmd_forward_tbl *default_miss;
+	struct mlx5hws_cmd_set_fte_dest dest = {0};
+	struct mlx5hws_context *ctx = tbl->ctx;
+	u8 tbl_type = tbl->type;
+
+	if (tbl->type != MLX5HWS_TABLE_TYPE_FDB)
+		return 0;
+
+	if (ctx->common_res[tbl_type].default_miss) {
+		ctx->common_res[tbl_type].default_miss->refcount++;
+		return 0;
+	}
+
+	ft_attr.type = tbl->fw_ft_type;
+	ft_attr.level = tbl->ctx->caps->fdb_ft.max_level; /* The last level */
+	ft_attr.rtc_valid = false;
+
+	dest.destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.destination_id = ctx->caps->eswitch_manager_vport_number;
+
+	fte_attr.action_flags = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	fte_attr.dests_num = 1;
+	fte_attr.dests = &dest;
+
+	default_miss = mlx5hws_cmd_forward_tbl_create(ctx->mdev, &ft_attr, &fte_attr);
+	if (!default_miss) {
+		mlx5hws_err(ctx, "Failed to default miss table type: 0x%x\n", tbl_type);
+		return -EINVAL;
+	}
+
+	/* ctx->ctrl_lock must be held here */
+	ctx->common_res[tbl_type].default_miss = default_miss;
+	ctx->common_res[tbl_type].default_miss->refcount++;
+
+	return 0;
+}
+
+/* Called under ctx->ctrl_lock */
+static void hws_table_down_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_cmd_forward_tbl *default_miss;
+	struct mlx5hws_context *ctx = tbl->ctx;
+	u8 tbl_type = tbl->type;
+
+	if (tbl->type != MLX5HWS_TABLE_TYPE_FDB)
+		return;
+
+	default_miss = ctx->common_res[tbl_type].default_miss;
+	if (--default_miss->refcount)
+		return;
+
+	mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, default_miss);
+	ctx->common_res[tbl_type].default_miss = NULL;
+}
+
+static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 ft_id)
+{
+	struct mlx5hws_cmd_ft_modify_attr ft_attr = {0};
+	int ret;
+
+	if (unlikely(tbl->type != MLX5HWS_TABLE_TYPE_FDB))
+		pr_warn("HWS: invalid table type %d\n", tbl->type);
+
+	mlx5hws_cmd_set_attr_connect_miss_tbl(tbl->ctx,
+					      tbl->fw_ft_type,
+					      tbl->type,
+					      &ft_attr);
+
+	ret = mlx5hws_cmd_flow_table_modify(tbl->ctx->mdev, &ft_attr, ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Failed to connect FT to default FDB FT\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev,
+				    struct mlx5hws_table *tbl,
+				    u32 *ft_id)
+{
+	struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
+	int ret;
+
+	hws_table_init_next_ft_attr(tbl, &ft_attr);
+	hws_table_set_cap_attr(tbl, &ft_attr);
+
+	ret = mlx5hws_cmd_flow_table_create(mdev, &ft_attr, ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Failed creating default ft\n");
+		return ret;
+	}
+
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
+		/* Take/create ref over the default miss */
+		ret = hws_table_up_default_fdb_miss_tbl(tbl);
+		if (ret) {
+			mlx5hws_err(tbl->ctx, "Failed to get default fdb miss\n");
+			goto free_ft_obj;
+		}
+		ret = hws_table_connect_to_default_miss_tbl(tbl, *ft_id);
+		if (ret) {
+			mlx5hws_err(tbl->ctx, "Failed connecting to default miss tbl\n");
+			goto down_miss_tbl;
+		}
+	}
+
+	return 0;
+
+down_miss_tbl:
+	hws_table_down_default_fdb_miss_tbl(tbl);
+free_ft_obj:
+	mlx5hws_cmd_flow_table_destroy(mdev, ft_attr.type, *ft_id);
+	return ret;
+}
+
+void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl,
+				      u32 ft_id)
+{
+	mlx5hws_cmd_flow_table_destroy(tbl->ctx->mdev, tbl->fw_ft_type, ft_id);
+	hws_table_down_default_fdb_miss_tbl(tbl);
+}
+
+static int hws_table_init_check_hws_support(struct mlx5hws_context *ctx,
+					    struct mlx5hws_table *tbl)
+{
+	if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT)) {
+		mlx5hws_err(ctx, "HWS not supported, cannot create mlx5hws_table\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int hws_table_init(struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_context *ctx = tbl->ctx;
+	int ret;
+
+	ret = hws_table_init_check_hws_support(ctx, tbl);
+	if (ret)
+		return ret;
+
+	if (mlx5hws_table_get_fw_ft_type(tbl->type, (u8 *)&tbl->fw_ft_type)) {
+		pr_warn("HWS: invalid table type %d\n", tbl->type);
+		return -EOPNOTSUPP;
+	}
+
+	mutex_lock(&ctx->ctrl_lock);
+	ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &tbl->ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Failed to create flow table object\n");
+		mutex_unlock(&ctx->ctrl_lock);
+		return ret;
+	}
+
+	ret = mlx5hws_action_get_default_stc(ctx, tbl->type);
+	if (ret)
+		goto tbl_destroy;
+
+	INIT_LIST_HEAD(&tbl->matchers_list);
+	INIT_LIST_HEAD(&tbl->default_miss.head);
+
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return 0;
+
+tbl_destroy:
+	mlx5hws_table_destroy_default_ft(tbl, tbl->ft_id);
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+static void hws_table_uninit(struct mlx5hws_table *tbl)
+{
+	mutex_lock(&tbl->ctx->ctrl_lock);
+	mlx5hws_action_put_default_stc(tbl->ctx, tbl->type);
+	mlx5hws_table_destroy_default_ft(tbl, tbl->ft_id);
+	mutex_unlock(&tbl->ctx->ctrl_lock);
+}
+
+struct mlx5hws_table *mlx5hws_table_create(struct mlx5hws_context *ctx,
+					   struct mlx5hws_table_attr *attr)
+{
+	struct mlx5hws_table *tbl;
+	int ret;
+
+	if (attr->type > MLX5HWS_TABLE_TYPE_FDB) {
+		mlx5hws_err(ctx, "Invalid table type %d\n", attr->type);
+		return NULL;
+	}
+
+	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+	if (!tbl)
+		return NULL;
+
+	tbl->ctx = ctx;
+	tbl->type = attr->type;
+	tbl->level = attr->level;
+
+	ret = hws_table_init(tbl);
+	if (ret) {
+		mlx5hws_err(ctx, "Failed to initialise table\n");
+		goto free_tbl;
+	}
+
+	mutex_lock(&ctx->ctrl_lock);
+	list_add(&tbl->tbl_list_node, &ctx->tbl_list);
+	mutex_unlock(&ctx->ctrl_lock);
+
+	return tbl;
+
+free_tbl:
+	kfree(tbl);
+	return NULL;
+}
+
+int mlx5hws_table_destroy(struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_context *ctx = tbl->ctx;
+	int ret;
+
+	mutex_lock(&ctx->ctrl_lock);
+	if (!list_empty(&tbl->matchers_list)) {
+		mlx5hws_err(tbl->ctx, "Cannot destroy table containing matchers\n");
+		ret = -EBUSY;
+		goto unlock_err;
+	}
+
+	if (!list_empty(&tbl->default_miss.head)) {
+		mlx5hws_err(tbl->ctx, "Cannot destroy table pointed by default miss\n");
+		ret = -EBUSY;
+		goto unlock_err;
+	}
+
+	list_del_init(&tbl->tbl_list_node);
+	mutex_unlock(&ctx->ctrl_lock);
+
+	hws_table_uninit(tbl);
+	kfree(tbl);
+
+	return 0;
+
+unlock_err:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
+
+static u32 hws_table_get_last_ft(struct mlx5hws_table *tbl)
+{
+	struct mlx5hws_matcher *matcher;
+
+	if (list_empty(&tbl->matchers_list))
+		return tbl->ft_id;
+
+	matcher = list_last_entry(&tbl->matchers_list, struct mlx5hws_matcher, list_node);
+	return matcher->end_ft_id;
+}
+
+int mlx5hws_table_ft_set_default_next_ft(struct mlx5hws_table *tbl, u32 ft_id)
+{
+	struct mlx5hws_cmd_ft_modify_attr ft_attr = {0};
+	int ret;
+
+	/* Due to FW limitation, resetting the flow table to default action will
+	 * disconnect RTC when ignore_flow_level_rtc_valid is not supported.
+	 */
+	if (!tbl->ctx->caps->nic_ft.ignore_flow_level_rtc_valid)
+		return 0;
+
+	if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
+		return hws_table_connect_to_default_miss_tbl(tbl, ft_id);
+
+	ft_attr.type = tbl->fw_ft_type;
+	ft_attr.modify_fs = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION;
+	ft_attr.table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_DEFAULT;
+
+	ret = mlx5hws_cmd_flow_table_modify(tbl->ctx->mdev, &ft_attr, ft_id);
+	if (ret) {
+		mlx5hws_err(tbl->ctx, "Failed to set FT default miss action\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx,
+				  u32 ft_id,
+				  u32 fw_ft_type,
+				  u32 rtc_0_id,
+				  u32 rtc_1_id)
+{
+	struct mlx5hws_cmd_ft_modify_attr ft_attr = {0};
+
+	ft_attr.modify_fs = MLX5_IFC_MODIFY_FLOW_TABLE_RTC_ID;
+	ft_attr.type = fw_ft_type;
+	ft_attr.rtc_id_0 = rtc_0_id;
+	ft_attr.rtc_id_1 = rtc_1_id;
+
+	return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id);
+}
+
+static int hws_table_ft_set_next_ft(struct mlx5hws_context *ctx,
+				    u32 ft_id,
+				    u32 fw_ft_type,
+				    u32 next_ft_id)
+{
+	struct mlx5hws_cmd_ft_modify_attr ft_attr = {0};
+
+	ft_attr.modify_fs = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION;
+	ft_attr.table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL;
+	ft_attr.type = fw_ft_type;
+	ft_attr.table_miss_id = next_ft_id;
+
+	return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id);
+}
+
+int mlx5hws_table_update_connected_miss_tables(struct mlx5hws_table *dst_tbl)
+{
+	struct mlx5hws_table *src_tbl;
+	int ret;
+
+	if (list_empty(&dst_tbl->default_miss.head))
+		return 0;
+
+	list_for_each_entry(src_tbl, &dst_tbl->default_miss.head, default_miss.next) {
+		ret = mlx5hws_table_connect_to_miss_table(src_tbl, dst_tbl);
+		if (ret) {
+			mlx5hws_err(dst_tbl->ctx,
+				    "Failed to update source miss table, unexpected behavior\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl,
+					struct mlx5hws_table *dst_tbl)
+{
+	struct mlx5hws_matcher *matcher;
+	u32 last_ft_id;
+	int ret;
+
+	last_ft_id = hws_table_get_last_ft(src_tbl);
+
+	if (dst_tbl) {
+		if (list_empty(&dst_tbl->matchers_list)) {
+			/* Connect src_tbl last_ft to dst_tbl start anchor */
+			ret = hws_table_ft_set_next_ft(src_tbl->ctx,
+						       last_ft_id,
+						       src_tbl->fw_ft_type,
+						       dst_tbl->ft_id);
+			if (ret)
+				return ret;
+
+			/* Reset last_ft RTC to default RTC */
+			ret = mlx5hws_table_ft_set_next_rtc(src_tbl->ctx,
+							    last_ft_id,
+							    src_tbl->fw_ft_type,
+							    0, 0);
+			if (ret)
+				return ret;
+		} else {
+			/* Connect src_tbl last_ft to first matcher RTC */
+			matcher = list_first_entry(&dst_tbl->matchers_list,
+						   struct mlx5hws_matcher,
+						   list_node);
+			ret = mlx5hws_table_ft_set_next_rtc(src_tbl->ctx,
+							    last_ft_id,
+							    src_tbl->fw_ft_type,
+							    matcher->match_ste.rtc_0_id,
+							    matcher->match_ste.rtc_1_id);
+			if (ret)
+				return ret;
+
+			/* Reset next miss FT to default */
+			ret = mlx5hws_table_ft_set_default_next_ft(src_tbl, last_ft_id);
+			if (ret)
+				return ret;
+		}
+	} else {
+		/* Reset next miss FT to default */
+		ret = mlx5hws_table_ft_set_default_next_ft(src_tbl, last_ft_id);
+		if (ret)
+			return ret;
+
+		/* Reset last_ft RTC to default RTC */
+		ret = mlx5hws_table_ft_set_next_rtc(src_tbl->ctx,
+						    last_ft_id,
+						    src_tbl->fw_ft_type,
+						    0, 0);
+		if (ret)
+			return ret;
+	}
+
+	src_tbl->default_miss.miss_tbl = dst_tbl;
+
+	return 0;
+}
+
+static int hws_table_set_default_miss_not_valid(struct mlx5hws_table *tbl,
+						struct mlx5hws_table *miss_tbl)
+{
+	if (!tbl->ctx->caps->nic_ft.ignore_flow_level_rtc_valid) {
+		mlx5hws_err(tbl->ctx, "Default miss table is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((miss_tbl && miss_tbl->type != tbl->type)) {
+		mlx5hws_err(tbl->ctx, "Invalid arguments\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5hws_table_set_default_miss(struct mlx5hws_table *tbl,
+				   struct mlx5hws_table *miss_tbl)
+{
+	struct mlx5hws_context *ctx = tbl->ctx;
+	struct mlx5hws_table *old_miss_tbl;
+	int ret;
+
+	ret = hws_table_set_default_miss_not_valid(tbl, miss_tbl);
+	if (ret)
+		return ret;
+
+	mutex_lock(&ctx->ctrl_lock);
+
+	old_miss_tbl = tbl->default_miss.miss_tbl;
+	ret = mlx5hws_table_connect_to_miss_table(tbl, miss_tbl);
+	if (ret)
+		goto out;
+
+	if (old_miss_tbl)
+		list_del_init(&tbl->default_miss.next);
+
+	old_miss_tbl = tbl->default_miss.miss_tbl;
+	if (old_miss_tbl)
+		list_del_init(&old_miss_tbl->default_miss.head);
+
+	if (miss_tbl)
+		list_add(&tbl->default_miss.next, &miss_tbl->default_miss.head);
+
+	mutex_unlock(&ctx->ctrl_lock);
+	return 0;
+out:
+	mutex_unlock(&ctx->ctrl_lock);
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.h
new file mode 100644
index 000000000000..dd50420eec9e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_table.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_TABLE_H_
+#define MLX5HWS_TABLE_H_
+
+struct mlx5hws_default_miss {
+	/* My miss table */
+	struct mlx5hws_table *miss_tbl;
+	struct list_head next;
+	/* Tables missing to my table */
+	struct list_head head;
+};
+
+struct mlx5hws_table {
+	struct mlx5hws_context *ctx;
+	u32 ft_id;
+	enum mlx5hws_table_type type;
+	u32 fw_ft_type;
+	u32 level;
+	struct list_head matchers_list;
+	struct list_head tbl_list_node;
+	struct mlx5hws_default_miss default_miss;
+};
+
+static inline
+u32 mlx5hws_table_get_fw_ft_type(enum mlx5hws_table_type type,
+				 u8 *ret_type)
+{
+	if (type != MLX5HWS_TABLE_TYPE_FDB)
+		return -EOPNOTSUPP;
+
+	*ret_type = FS_FT_FDB;
+
+	return 0;
+}
+
+static inline
+u32 mlx5hws_table_get_res_fw_ft_type(enum mlx5hws_table_type tbl_type,
+				     bool is_mirror)
+{
+	if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
+		return is_mirror ? FS_FT_FDB_TX : FS_FT_FDB_RX;
+
+	return 0;
+}
+
+int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev,
+				    struct mlx5hws_table *tbl,
+				    u32 *ft_id);
+
+void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl,
+				      u32 ft_id);
+
+int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl,
+					struct mlx5hws_table *dst_tbl);
+
+int mlx5hws_table_update_connected_miss_tables(struct mlx5hws_table *dst_tbl);
+
+int mlx5hws_table_ft_set_default_next_ft(struct mlx5hws_table *tbl, u32 ft_id);
+
+int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx,
+				  u32 ft_id,
+				  u32 fw_ft_type,
+				  u32 rtc_0_id,
+				  u32 rtc_1_id);
+
+#endif /* MLX5HWS_TABLE_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.c
new file mode 100644
index 000000000000..faf42421c43f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#include "mlx5hws_internal.h"
+
+int mlx5hws_vport_init_vports(struct mlx5hws_context *ctx)
+{
+	int ret;
+
+	if (!ctx->caps->eswitch_manager)
+		return 0;
+
+	xa_init(&ctx->vports.vport_gvmi_xa);
+
+	/* Set gvmi for eswitch manager and uplink vports only. Rest of the vports
+	 * (vport 0 of other function, VFs and SFs) will be queried dynamically.
+	 */
+
+	ret = mlx5hws_cmd_query_gvmi(ctx->mdev, false, 0, &ctx->vports.esw_manager_gvmi);
+	if (ret)
+		return ret;
+
+	ctx->vports.uplink_gvmi = 0;
+	return 0;
+}
+
+void mlx5hws_vport_uninit_vports(struct mlx5hws_context *ctx)
+{
+	if (ctx->caps->eswitch_manager)
+		xa_destroy(&ctx->vports.vport_gvmi_xa);
+}
+
+static int hws_vport_add_gvmi(struct mlx5hws_context *ctx, u16 vport)
+{
+	u16 vport_gvmi;
+	int ret;
+
+	ret = mlx5hws_cmd_query_gvmi(ctx->mdev, true, vport, &vport_gvmi);
+	if (ret)
+		return -EINVAL;
+
+	ret = xa_insert(&ctx->vports.vport_gvmi_xa, vport,
+			xa_mk_value(vport_gvmi), GFP_KERNEL);
+	if (ret)
+		mlx5hws_dbg(ctx, "Couldn't insert new vport gvmi into xarray (%d)\n", ret);
+
+	return ret;
+}
+
+static bool hws_vport_is_esw_mgr_vport(struct mlx5hws_context *ctx, u16 vport)
+{
+	return ctx->caps->is_ecpf ? vport == MLX5_VPORT_ECPF :
+				    vport == MLX5_VPORT_PF;
+}
+
+int mlx5hws_vport_get_gvmi(struct mlx5hws_context *ctx, u16 vport, u16 *vport_gvmi)
+{
+	void *entry;
+	int ret;
+
+	if (!ctx->caps->eswitch_manager)
+		return -EINVAL;
+
+	if (hws_vport_is_esw_mgr_vport(ctx, vport)) {
+		*vport_gvmi = ctx->vports.esw_manager_gvmi;
+		return 0;
+	}
+
+	if (vport == MLX5_VPORT_UPLINK) {
+		*vport_gvmi = ctx->vports.uplink_gvmi;
+		return 0;
+	}
+
+load_entry:
+	entry = xa_load(&ctx->vports.vport_gvmi_xa, vport);
+
+	if (!xa_is_value(entry)) {
+		ret = hws_vport_add_gvmi(ctx, vport);
+		if (ret && ret != -EBUSY)
+			return ret;
+		goto load_entry;
+	}
+
+	*vport_gvmi = (u16)xa_to_value(entry);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.h
new file mode 100644
index 000000000000..0912fc166b3a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_vport.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+
+#ifndef MLX5HWS_VPORT_H_
+#define MLX5HWS_VPORT_H_
+
+int mlx5hws_vport_init_vports(struct mlx5hws_context *ctx);
+
+void mlx5hws_vport_uninit_vports(struct mlx5hws_context *ctx);
+
+int mlx5hws_vport_get_gvmi(struct mlx5hws_context *ctx, u16 vport, u16 *vport_gvmi);
+
+#endif /* MLX5HWS_VPORT_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
index bc94e75a7aeb..e7777700ee18 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
@@ -40,6 +40,7 @@
  */
 #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0
 #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1
+#define MLXBF_GIGE_MAX_FILTER_IDX       3
 
 /* Define for broadcast MAC literal */
 #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF
@@ -175,6 +176,13 @@ enum mlxbf_gige_res {
 int mlxbf_gige_mdio_probe(struct platform_device *pdev,
 			  struct mlxbf_gige *priv);
 void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
+
+void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv);
+void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv);
+void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
+				     unsigned int index);
+void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
+				      unsigned int index);
 void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
 				  unsigned int index, u64 dmac);
 void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index b157f0f1c5a8..385a56ac7348 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
 	if (err)
 		goto napi_deinit;
 
+	mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX);
+	mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX);
+	mlxbf_gige_enable_multicast_rx(priv);
+
 	/* Set bits in INT_EN that we care about */
 	int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
 		 MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
@@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
 	void __iomem *plu_base;
 	void __iomem *base;
 	int addr, phy_irq;
+	unsigned int i;
 	int err;
 
 	base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
@@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
 	priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ;
 	priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ;
 
+	for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++)
+		mlxbf_gige_disable_mac_rx_filter(priv, i);
+	mlxbf_gige_disable_multicast_rx(priv);
+	mlxbf_gige_disable_promisc(priv);
+
 	/* Write initial MAC address to hardware */
 	mlxbf_gige_initial_mac(priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
index 98a8681c21b9..4d14cb13fd64 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
@@ -62,6 +62,8 @@
 #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL           BIT(1)
 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START     0x0520
 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END       0x0528
+#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL              0x0530
+#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST         BIT(1)
 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC           0x0540
 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN        BIT(0)
 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS           0x0548
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
index 699984358493..eb62620b63c7 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
@@ -11,15 +11,31 @@
 #include "mlxbf_gige.h"
 #include "mlxbf_gige_regs.h"
 
-void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
-				  unsigned int index, u64 dmac)
+void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv)
 {
 	void __iomem *base = priv->base;
-	u64 control;
+	u64 data;
 
-	/* Write destination MAC to specified MAC RX filter */
-	writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
-	       (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
+	data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
+	data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
+	writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
+}
+
+void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv)
+{
+	void __iomem *base = priv->base;
+	u64 data;
+
+	data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
+	data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST;
+	writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL);
+}
+
+void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv,
+				     unsigned int index)
+{
+	void __iomem *base = priv->base;
+	u64 control;
 
 	/* Enable MAC receive filter mask for specified index */
 	control = readq(base + MLXBF_GIGE_CONTROL);
@@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
 	writeq(control, base + MLXBF_GIGE_CONTROL);
 }
 
+void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv,
+				      unsigned int index)
+{
+	void __iomem *base = priv->base;
+	u64 control;
+
+	/* Disable MAC receive filter mask for specified index */
+	control = readq(base + MLXBF_GIGE_CONTROL);
+	control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
+	writeq(control, base + MLXBF_GIGE_CONTROL);
+}
+
+void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+				  unsigned int index, u64 dmac)
+{
+	void __iomem *base = priv->base;
+
+	/* Write destination MAC to specified MAC RX filter */
+	writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
+	       (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
+}
+
 void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
 				  unsigned int index, u64 *dmac)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index d61478c0c632..e746cd9c68ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -165,52 +165,22 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
 	return -ENODEV;
 }
 
-static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
-			      struct thermal_cooling_device *cdev)
+static bool mlxsw_thermal_should_bind(struct thermal_zone_device *tzdev,
+				      const struct thermal_trip *trip,
+				      struct thermal_cooling_device *cdev,
+				      struct cooling_spec *c)
 {
 	struct mlxsw_thermal *thermal = thermal_zone_device_priv(tzdev);
-	struct device *dev = thermal->bus_info->dev;
-	int i, err;
+	const struct mlxsw_cooling_states *state = trip->priv;
 
 	/* If the cooling device is one of ours bind it */
 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
-		return 0;
+		return false;
 
-	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
-		const struct mlxsw_cooling_states *state = &thermal->cooling_states[i];
+	c->upper = state->max_state;
+	c->lower = state->min_state;
 
-		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
-						       state->max_state,
-						       state->min_state,
-						       THERMAL_WEIGHT_DEFAULT);
-		if (err < 0) {
-			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
-			return err;
-		}
-	}
-	return 0;
-}
-
-static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
-				struct thermal_cooling_device *cdev)
-{
-	struct mlxsw_thermal *thermal = thermal_zone_device_priv(tzdev);
-	struct device *dev = thermal->bus_info->dev;
-	int i;
-	int err;
-
-	/* If the cooling device is our one unbind it */
-	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
-		return 0;
-
-	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
-		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
-		if (err < 0) {
-			dev_err(dev, "Failed to unbind cooling device\n");
-			return err;
-		}
-	}
-	return 0;
+	return true;
 }
 
 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
@@ -240,57 +210,27 @@ static struct thermal_zone_params mlxsw_thermal_params = {
 };
 
 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
-	.bind = mlxsw_thermal_bind,
-	.unbind = mlxsw_thermal_unbind,
+	.should_bind = mlxsw_thermal_should_bind,
 	.get_temp = mlxsw_thermal_get_temp,
 };
 
-static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
-				     struct thermal_cooling_device *cdev)
+static bool mlxsw_thermal_module_should_bind(struct thermal_zone_device *tzdev,
+					     const struct thermal_trip *trip,
+					     struct thermal_cooling_device *cdev,
+					     struct cooling_spec *c)
 {
 	struct mlxsw_thermal_module *tz = thermal_zone_device_priv(tzdev);
+	const struct mlxsw_cooling_states *state = trip->priv;
 	struct mlxsw_thermal *thermal = tz->parent;
-	int i, j, err;
 
 	/* If the cooling device is one of ours bind it */
 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
-		return 0;
-
-	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
-		const struct mlxsw_cooling_states *state = &tz->cooling_states[i];
-
-		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
-						       state->max_state,
-						       state->min_state,
-						       THERMAL_WEIGHT_DEFAULT);
-		if (err < 0)
-			goto err_thermal_zone_bind_cooling_device;
-	}
-	return 0;
-
-err_thermal_zone_bind_cooling_device:
-	for (j = i - 1; j >= 0; j--)
-		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
-	return err;
-}
-
-static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
-				       struct thermal_cooling_device *cdev)
-{
-	struct mlxsw_thermal_module *tz = thermal_zone_device_priv(tzdev);
-	struct mlxsw_thermal *thermal = tz->parent;
-	int i;
-	int err;
+		return false;
 
-	/* If the cooling device is one of ours unbind it */
-	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
-		return 0;
+	c->upper = state->max_state;
+	c->lower = state->min_state;
 
-	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
-		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
-		WARN_ON(err);
-	}
-	return err;
+	return true;
 }
 
 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
@@ -313,8 +253,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 }
 
 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
-	.bind		= mlxsw_thermal_module_bind,
-	.unbind		= mlxsw_thermal_module_unbind,
+	.should_bind	= mlxsw_thermal_module_should_bind,
 	.get_temp	= mlxsw_thermal_module_temp_get,
 };
 
@@ -342,8 +281,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
 }
 
 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
-	.bind		= mlxsw_thermal_module_bind,
-	.unbind		= mlxsw_thermal_module_unbind,
+	.should_bind	= mlxsw_thermal_module_should_bind,
 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
 };
 
@@ -411,7 +349,7 @@ static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
 static int
 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
 {
-	char tz_name[THERMAL_NAME_LENGTH];
+	char tz_name[40];
 	int err;
 
 	if (module_tz->slot_index)
@@ -445,17 +383,14 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
 	thermal_zone_device_unregister(tzdev);
 }
 
-static void
-mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
-			  struct mlxsw_thermal *thermal,
+static int
+mlxsw_thermal_module_init(struct mlxsw_thermal *thermal,
 			  struct mlxsw_thermal_area *area, u8 module)
 {
 	struct mlxsw_thermal_module *module_tz;
+	int i;
 
 	module_tz = &area->tz_module_arr[module];
-	/* Skip if parent is already set (case of port split). */
-	if (module_tz->parent)
-		return;
 	module_tz->module = module;
 	module_tz->slot_index = area->slot_index;
 	module_tz->parent = thermal;
@@ -465,15 +400,15 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
 	       sizeof(thermal->trips));
 	memcpy(module_tz->cooling_states, default_cooling_states,
 	       sizeof(thermal->cooling_states));
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++)
+		module_tz->trips[i].priv = &module_tz->cooling_states[i];
+
+	return mlxsw_thermal_module_tz_init(module_tz);
 }
 
 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
 {
-	if (module_tz && module_tz->tzdev) {
-		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
-		module_tz->tzdev = NULL;
-		module_tz->parent = NULL;
-	}
+	mlxsw_thermal_module_tz_fini(module_tz->tzdev);
 }
 
 static int
@@ -481,7 +416,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
 			   struct mlxsw_thermal *thermal,
 			   struct mlxsw_thermal_area *area)
 {
-	struct mlxsw_thermal_module *module_tz;
 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
 	int i, err;
 
@@ -503,22 +437,16 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
 	if (!area->tz_module_arr)
 		return -ENOMEM;
 
-	for (i = 0; i < area->tz_module_num; i++)
-		mlxsw_thermal_module_init(dev, core, thermal, area, i);
-
 	for (i = 0; i < area->tz_module_num; i++) {
-		module_tz = &area->tz_module_arr[i];
-		if (!module_tz->parent)
-			continue;
-		err = mlxsw_thermal_module_tz_init(module_tz);
+		err = mlxsw_thermal_module_init(thermal, area, i);
 		if (err)
-			goto err_thermal_module_tz_init;
+			goto err_thermal_module_init;
 	}
 
 	return 0;
 
-err_thermal_module_tz_init:
-	for (i = area->tz_module_num - 1; i >= 0; i--)
+err_thermal_module_init:
+	for (i--; i >= 0; i--)
 		mlxsw_thermal_module_fini(&area->tz_module_arr[i]);
 	kfree(area->tz_module_arr);
 	return err;
@@ -579,7 +507,7 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
 	struct mlxsw_thermal_module *gearbox_tz;
 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
 	u8 gbox_num;
-	int i;
+	int i, j;
 	int err;
 
 	mlxsw_reg_mgpir_pack(mgpir_pl, area->slot_index);
@@ -606,6 +534,9 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
 		       sizeof(thermal->trips));
 		memcpy(gearbox_tz->cooling_states, default_cooling_states,
 		       sizeof(thermal->cooling_states));
+		for (j = 0; j < MLXSW_THERMAL_NUM_TRIPS; j++)
+			gearbox_tz->trips[j].priv = &gearbox_tz->cooling_states[j];
+
 		gearbox_tz->module = i;
 		gearbox_tz->parent = thermal;
 		gearbox_tz->slot_index = area->slot_index;
@@ -722,6 +653,9 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
 	thermal->bus_info = bus_info;
 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
 	memcpy(thermal->cooling_states, default_cooling_states, sizeof(thermal->cooling_states));
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++)
+		thermal->trips[i].priv = &thermal->cooling_states[i];
+
 	thermal->line_cards[0].slot_index = 0;
 
 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
@@ -821,10 +755,7 @@ err_linecards_event_ops_register:
 err_thermal_gearboxes_init:
 	mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]);
 err_thermal_modules_init:
-	if (thermal->tzdev) {
-		thermal_zone_device_unregister(thermal->tzdev);
-		thermal->tzdev = NULL;
-	}
+	thermal_zone_device_unregister(thermal->tzdev);
 err_thermal_zone_device_register:
 err_thermal_cooling_device_register:
 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
@@ -845,10 +776,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
 					     thermal);
 	mlxsw_thermal_gearboxes_fini(thermal, &thermal->line_cards[0]);
 	mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]);
-	if (thermal->tzdev) {
-		thermal_zone_device_unregister(thermal->tzdev);
-		thermal->tzdev = NULL;
-	}
+	thermal_zone_device_unregister(thermal->tzdev);
 
 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
 		thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f064789f3240..3f5e5d99251b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1676,9 +1676,11 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port,
 
 	netif_carrier_off(dev);
 
-	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
-			 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER |
+			 NETIF_F_HW_TC;
 	dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK;
+	dev->lltx = true;
+	dev->netns_local = true;
 
 	dev->min_mtu = ETH_MIN_MTU;
 	dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR;
@@ -2784,7 +2786,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
 	.hwtstamp_get	= mlxsw_sp1_ptp_hwtstamp_get,
 	.hwtstamp_set	= mlxsw_sp1_ptp_hwtstamp_set,
 	.shaper_work	= mlxsw_sp1_ptp_shaper_work,
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 	.get_ts_info	= mlxsw_sp1_ptp_get_ts_info,
+#endif
 	.get_stats_count = mlxsw_sp1_get_stats_count,
 	.get_stats_strings = mlxsw_sp1_get_stats_strings,
 	.get_stats	= mlxsw_sp1_get_stats,
@@ -2801,7 +2805,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
 	.hwtstamp_get	= mlxsw_sp2_ptp_hwtstamp_get,
 	.hwtstamp_set	= mlxsw_sp2_ptp_hwtstamp_set,
 	.shaper_work	= mlxsw_sp2_ptp_shaper_work,
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 	.get_ts_info	= mlxsw_sp2_ptp_get_ts_info,
+#endif
 	.get_stats_count = mlxsw_sp2_get_stats_count,
 	.get_stats_strings = mlxsw_sp2_get_stats_strings,
 	.get_stats	= mlxsw_sp2_get_stats,
@@ -2818,7 +2824,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = {
 	.hwtstamp_get	= mlxsw_sp2_ptp_hwtstamp_get,
 	.hwtstamp_set	= mlxsw_sp2_ptp_hwtstamp_set,
 	.shaper_work	= mlxsw_sp2_ptp_shaper_work,
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 	.get_ts_info	= mlxsw_sp2_ptp_get_ts_info,
+#endif
 	.get_stats_count = mlxsw_sp2_get_stats_count,
 	.get_stats_strings = mlxsw_sp2_get_stats_strings,
 	.get_stats	= mlxsw_sp2_get_stats,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
index 769095d4932d..c8aa1452fbb9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -11,14 +11,6 @@ struct mlxsw_sp;
 struct mlxsw_sp_port;
 struct mlxsw_sp_ptp_clock;
 
-static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct kernel_ethtool_ts_info *info)
-{
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
-	info->phc_index = -1;
-	return 0;
-}
-
 #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 
 struct mlxsw_sp_ptp_clock *
@@ -151,12 +143,6 @@ static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
 {
 }
 
-static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
-					    struct kernel_ethtool_ts_info *info)
-{
-	return mlxsw_sp_ptp_get_ts_info_noptp(info);
-}
-
 static inline int mlxsw_sp1_get_stats_count(void)
 {
 	return 0;
@@ -226,12 +212,6 @@ mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
-static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
-					    struct kernel_ethtool_ts_info *info)
-{
-	return mlxsw_sp_ptp_get_ts_info_noptp(info);
-}
-
 static inline int
 mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core,
 			      struct mlxsw_sp_port *mlxsw_sp_port,
diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig
index 86034ea4ba5b..85519690b837 100644
--- a/drivers/net/ethernet/meta/Kconfig
+++ b/drivers/net/ethernet/meta/Kconfig
@@ -20,9 +20,11 @@ if NET_VENDOR_META
 config FBNIC
 	tristate "Meta Platforms Host Network Interface"
 	depends on X86_64 || COMPILE_TEST
-	depends on S390=n
+	depends on !S390
 	depends on MAX_SKB_FRAGS < 22
 	depends on PCI_MSI
+	select NET_DEVLINK
+	select PAGE_POOL
 	select PHYLINK
 	help
 	  This driver supports Meta Platforms Host Network Interface.
diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile
index 9373b558fdc9..ed4533a73c57 100644
--- a/drivers/net/ethernet/meta/fbnic/Makefile
+++ b/drivers/net/ethernet/meta/fbnic/Makefile
@@ -8,7 +8,9 @@
 obj-$(CONFIG_FBNIC) += fbnic.o
 
 fbnic-y := fbnic_devlink.o \
+	   fbnic_ethtool.o \
 	   fbnic_fw.o \
+	   fbnic_hw_stats.o \
 	   fbnic_irq.o \
 	   fbnic_mac.o \
 	   fbnic_netdev.o \
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index ad2689bfd6cb..0f9e8d79461c 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -11,6 +11,7 @@
 
 #include "fbnic_csr.h"
 #include "fbnic_fw.h"
+#include "fbnic_hw_stats.h"
 #include "fbnic_mac.h"
 #include "fbnic_rpc.h"
 
@@ -47,6 +48,9 @@ struct fbnic_dev {
 
 	/* Number of TCQs/RCQs available on hardware */
 	u16 max_num_queues;
+
+	/* Local copy of hardware statistics */
+	struct fbnic_hw_stats hw_stats;
 };
 
 /* Reserve entry 0 in the MSI-X "others" array until we have filled all
@@ -132,6 +136,9 @@ void fbnic_free_irq(struct fbnic_dev *dev, int nr, void *data);
 void fbnic_free_irqs(struct fbnic_dev *fbd);
 int fbnic_alloc_irqs(struct fbnic_dev *fbd);
 
+void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version,
+				 const size_t str_sz);
+
 enum fbnic_boards {
 	fbnic_board_asic
 };
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index a64360de0552..21db509acbc1 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -660,6 +660,43 @@ enum {
 #define FBNIC_SIG_PCS_INTR_MASK		0x11816		/* 0x46058 */
 #define FBNIC_CSR_END_SIG		0x1184e /* CSR section delimiter */
 
+#define FBNIC_CSR_START_MAC_STAT	0x11a00
+#define FBNIC_MAC_STAT_RX_BYTE_COUNT_L	0x11a08		/* 0x46820 */
+#define FBNIC_MAC_STAT_RX_BYTE_COUNT_H	0x11a09		/* 0x46824 */
+#define FBNIC_MAC_STAT_RX_ALIGN_ERROR_L \
+					0x11a0a		/* 0x46828 */
+#define FBNIC_MAC_STAT_RX_ALIGN_ERROR_H \
+					0x11a0b		/* 0x4682c */
+#define FBNIC_MAC_STAT_RX_TOOLONG_L	0x11a0e		/* 0x46838 */
+#define FBNIC_MAC_STAT_RX_TOOLONG_H	0x11a0f		/* 0x4683c */
+#define FBNIC_MAC_STAT_RX_RECEIVED_OK_L	\
+					0x11a12		/* 0x46848 */
+#define FBNIC_MAC_STAT_RX_RECEIVED_OK_H	\
+					0x11a13		/* 0x4684c */
+#define FBNIC_MAC_STAT_RX_PACKET_BAD_FCS_L \
+					0x11a14		/* 0x46850 */
+#define FBNIC_MAC_STAT_RX_PACKET_BAD_FCS_H \
+					0x11a15		/* 0x46854 */
+#define FBNIC_MAC_STAT_RX_IFINERRORS_L	0x11a18		/* 0x46860 */
+#define FBNIC_MAC_STAT_RX_IFINERRORS_H	0x11a19		/* 0x46864 */
+#define FBNIC_MAC_STAT_RX_MULTICAST_L	0x11a1c		/* 0x46870 */
+#define FBNIC_MAC_STAT_RX_MULTICAST_H	0x11a1d		/* 0x46874 */
+#define FBNIC_MAC_STAT_RX_BROADCAST_L	0x11a1e		/* 0x46878 */
+#define FBNIC_MAC_STAT_RX_BROADCAST_H	0x11a1f		/* 0x4687c */
+#define FBNIC_MAC_STAT_TX_BYTE_COUNT_L	0x11a3e		/* 0x468f8 */
+#define FBNIC_MAC_STAT_TX_BYTE_COUNT_H	0x11a3f		/* 0x468fc */
+#define FBNIC_MAC_STAT_TX_TRANSMITTED_OK_L \
+					0x11a42		/* 0x46908 */
+#define FBNIC_MAC_STAT_TX_TRANSMITTED_OK_H \
+					0x11a43		/* 0x4690c */
+#define FBNIC_MAC_STAT_TX_IFOUTERRORS_L \
+					0x11a46		/* 0x46918 */
+#define FBNIC_MAC_STAT_TX_IFOUTERRORS_H \
+					0x11a47		/* 0x4691c */
+#define FBNIC_MAC_STAT_TX_MULTICAST_L	0x11a4a		/* 0x46928 */
+#define FBNIC_MAC_STAT_TX_MULTICAST_H	0x11a4b		/* 0x4692c */
+#define FBNIC_MAC_STAT_TX_BROADCAST_L	0x11a4c		/* 0x46930 */
+#define FBNIC_MAC_STAT_TX_BROADCAST_H	0x11a4d		/* 0x46934 */
 /* PUL User Registers */
 #define FBNIC_CSR_START_PUL_USER	0x31000	/* CSR section delimiter */
 #define FBNIC_PUL_OB_TLP_HDR_AW_CFG	0x3103d		/* 0xc40f4 */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c
index e87049dfd223..ef05ae8f5039 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c
@@ -10,6 +10,56 @@
 
 #define FBNIC_SN_STR_LEN	24
 
+static int fbnic_version_running_put(struct devlink_info_req *req,
+				     struct fbnic_fw_ver *fw_ver,
+				     char *ver_name)
+{
+	char running_ver[FBNIC_FW_VER_MAX_SIZE];
+	int err;
+
+	fbnic_mk_fw_ver_str(fw_ver->version, running_ver);
+	err = devlink_info_version_running_put(req, ver_name, running_ver);
+	if (err)
+		return err;
+
+	if (strlen(fw_ver->commit) > 0) {
+		char commit_name[FBNIC_SN_STR_LEN];
+
+		snprintf(commit_name, FBNIC_SN_STR_LEN, "%s.commit", ver_name);
+		err = devlink_info_version_running_put(req, commit_name,
+						       fw_ver->commit);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int fbnic_version_stored_put(struct devlink_info_req *req,
+				    struct fbnic_fw_ver *fw_ver,
+				    char *ver_name)
+{
+	char stored_ver[FBNIC_FW_VER_MAX_SIZE];
+	int err;
+
+	fbnic_mk_fw_ver_str(fw_ver->version, stored_ver);
+	err = devlink_info_version_stored_put(req, ver_name, stored_ver);
+	if (err)
+		return err;
+
+	if (strlen(fw_ver->commit) > 0) {
+		char commit_name[FBNIC_SN_STR_LEN];
+
+		snprintf(commit_name, FBNIC_SN_STR_LEN, "%s.commit", ver_name);
+		err = devlink_info_version_stored_put(req, commit_name,
+						      fw_ver->commit);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int fbnic_devlink_info_get(struct devlink *devlink,
 				  struct devlink_info_req *req,
 				  struct netlink_ext_ack *extack)
@@ -17,6 +67,31 @@ static int fbnic_devlink_info_get(struct devlink *devlink,
 	struct fbnic_dev *fbd = devlink_priv(devlink);
 	int err;
 
+	err = fbnic_version_running_put(req, &fbd->fw_cap.running.mgmt,
+					DEVLINK_INFO_VERSION_GENERIC_FW);
+	if (err)
+		return err;
+
+	err = fbnic_version_running_put(req, &fbd->fw_cap.running.bootloader,
+					DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER);
+	if (err)
+		return err;
+
+	err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.mgmt,
+				       DEVLINK_INFO_VERSION_GENERIC_FW);
+	if (err)
+		return err;
+
+	err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.bootloader,
+				       DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER);
+	if (err)
+		return err;
+
+	err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.undi,
+				       DEVLINK_INFO_VERSION_GENERIC_FW_UNDI);
+	if (err)
+		return err;
+
 	if (fbd->dsn) {
 		unsigned char serial[FBNIC_SN_STR_LEN];
 		u8 dsn[8];
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
new file mode 100644
index 000000000000..5d980e178941
--- /dev/null
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -0,0 +1,75 @@
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "fbnic.h"
+#include "fbnic_netdev.h"
+#include "fbnic_tlv.h"
+
+static void
+fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct fbnic_net *fbn = netdev_priv(netdev);
+	struct fbnic_dev *fbd = fbn->fbd;
+
+	fbnic_get_fw_ver_commit_str(fbd, drvinfo->fw_version,
+				    sizeof(drvinfo->fw_version));
+}
+
+static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter)
+{
+	if (counter->reported)
+		*stat = counter->value;
+}
+
+static void
+fbnic_get_eth_mac_stats(struct net_device *netdev,
+			struct ethtool_eth_mac_stats *eth_mac_stats)
+{
+	struct fbnic_net *fbn = netdev_priv(netdev);
+	struct fbnic_mac_stats *mac_stats;
+	struct fbnic_dev *fbd = fbn->fbd;
+	const struct fbnic_mac *mac;
+
+	mac_stats = &fbd->hw_stats.mac;
+	mac = fbd->mac;
+
+	mac->get_eth_mac_stats(fbd, false, &mac_stats->eth_mac);
+
+	fbnic_set_counter(&eth_mac_stats->FramesTransmittedOK,
+			  &mac_stats->eth_mac.FramesTransmittedOK);
+	fbnic_set_counter(&eth_mac_stats->FramesReceivedOK,
+			  &mac_stats->eth_mac.FramesReceivedOK);
+	fbnic_set_counter(&eth_mac_stats->FrameCheckSequenceErrors,
+			  &mac_stats->eth_mac.FrameCheckSequenceErrors);
+	fbnic_set_counter(&eth_mac_stats->AlignmentErrors,
+			  &mac_stats->eth_mac.AlignmentErrors);
+	fbnic_set_counter(&eth_mac_stats->OctetsTransmittedOK,
+			  &mac_stats->eth_mac.OctetsTransmittedOK);
+	fbnic_set_counter(&eth_mac_stats->FramesLostDueToIntMACXmitError,
+			  &mac_stats->eth_mac.FramesLostDueToIntMACXmitError);
+	fbnic_set_counter(&eth_mac_stats->OctetsReceivedOK,
+			  &mac_stats->eth_mac.OctetsReceivedOK);
+	fbnic_set_counter(&eth_mac_stats->FramesLostDueToIntMACRcvError,
+			  &mac_stats->eth_mac.FramesLostDueToIntMACRcvError);
+	fbnic_set_counter(&eth_mac_stats->MulticastFramesXmittedOK,
+			  &mac_stats->eth_mac.MulticastFramesXmittedOK);
+	fbnic_set_counter(&eth_mac_stats->BroadcastFramesXmittedOK,
+			  &mac_stats->eth_mac.BroadcastFramesXmittedOK);
+	fbnic_set_counter(&eth_mac_stats->MulticastFramesReceivedOK,
+			  &mac_stats->eth_mac.MulticastFramesReceivedOK);
+	fbnic_set_counter(&eth_mac_stats->BroadcastFramesReceivedOK,
+			  &mac_stats->eth_mac.BroadcastFramesReceivedOK);
+	fbnic_set_counter(&eth_mac_stats->FrameTooLongErrors,
+			  &mac_stats->eth_mac.FrameTooLongErrors);
+}
+
+static const struct ethtool_ops fbnic_ethtool_ops = {
+	.get_drvinfo		= fbnic_get_drvinfo,
+	.get_eth_mac_stats	= fbnic_get_eth_mac_stats,
+};
+
+void fbnic_set_ethtool_ops(struct net_device *dev)
+{
+	dev->ethtool_ops = &fbnic_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
index 0c6e1b4c119b..8f7a2a19ddf8 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
@@ -789,3 +789,16 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd)
 		count += (tx_mbx->head - head) % FBNIC_IPC_MBX_DESC_LEN;
 	} while (count < FBNIC_IPC_MBX_DESC_LEN && --attempts);
 }
+
+void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version,
+				 const size_t str_sz)
+{
+	struct fbnic_fw_ver *mgmt = &fbd->fw_cap.running.mgmt;
+	const char *delim = "";
+
+	if (mgmt->commit[0])
+		delim = "_";
+
+	fbnic_mk_full_fw_ver_str(mgmt->version, delim, mgmt->commit,
+				 fw_version, str_sz);
+}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
index c65bca613665..221faf8c6756 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
@@ -53,10 +53,10 @@ int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership);
 int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll);
 void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd);
 
-#define fbnic_mk_full_fw_ver_str(_rev_id, _delim, _commit, _str)	\
+#define fbnic_mk_full_fw_ver_str(_rev_id, _delim, _commit, _str, _str_sz) \
 do {									\
 	const u32 __rev_id = _rev_id;					\
-	snprintf(_str, sizeof(_str), "%02lu.%02lu.%02lu-%03lu%s%s",	\
+	snprintf(_str, _str_sz, "%02lu.%02lu.%02lu-%03lu%s%s",	\
 		 FIELD_GET(FBNIC_FW_CAP_RESP_VERSION_MAJOR, __rev_id),	\
 		 FIELD_GET(FBNIC_FW_CAP_RESP_VERSION_MINOR, __rev_id),	\
 		 FIELD_GET(FBNIC_FW_CAP_RESP_VERSION_PATCH, __rev_id),	\
@@ -65,7 +65,7 @@ do {									\
 } while (0)
 
 #define fbnic_mk_fw_ver_str(_rev_id, _str) \
-	fbnic_mk_full_fw_ver_str(_rev_id, "", "", _str)
+	fbnic_mk_full_fw_ver_str(_rev_id, "", "", _str, sizeof(_str))
 
 #define FW_HEARTBEAT_PERIOD		(10 * HZ)
 
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
new file mode 100644
index 000000000000..a0acc7606aa1
--- /dev/null
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
@@ -0,0 +1,27 @@
+#include "fbnic.h"
+
+u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset)
+{
+	u32 prev_upper, upper, lower, diff;
+
+	prev_upper = rd32(fbd, reg + offset);
+	lower = rd32(fbd, reg);
+	upper = rd32(fbd, reg + offset);
+
+	diff = upper - prev_upper;
+	if (!diff)
+		return ((u64)upper << 32) | lower;
+
+	if (diff > 1)
+		dev_warn_once(fbd->dev,
+			      "Stats inconsistent, upper 32b of %#010x updating too quickly\n",
+			      reg * 4);
+
+	/* Return only the upper bits as we cannot guarantee
+	 * the accuracy of the lower bits. We will add them in
+	 * when the counter slows down enough that we can get
+	 * a snapshot with both upper values being the same
+	 * between reads.
+	 */
+	return ((u64)upper << 32);
+}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
new file mode 100644
index 000000000000..30348904b510
--- /dev/null
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
@@ -0,0 +1,40 @@
+#include <linux/ethtool.h>
+
+#include "fbnic_csr.h"
+
+struct fbnic_stat_counter {
+	u64 value;
+	union {
+		u32 old_reg_value_32;
+		u64 old_reg_value_64;
+	} u;
+	bool reported;
+};
+
+struct fbnic_eth_mac_stats {
+	struct fbnic_stat_counter FramesTransmittedOK;
+	struct fbnic_stat_counter FramesReceivedOK;
+	struct fbnic_stat_counter FrameCheckSequenceErrors;
+	struct fbnic_stat_counter AlignmentErrors;
+	struct fbnic_stat_counter OctetsTransmittedOK;
+	struct fbnic_stat_counter FramesLostDueToIntMACXmitError;
+	struct fbnic_stat_counter OctetsReceivedOK;
+	struct fbnic_stat_counter FramesLostDueToIntMACRcvError;
+	struct fbnic_stat_counter MulticastFramesXmittedOK;
+	struct fbnic_stat_counter BroadcastFramesXmittedOK;
+	struct fbnic_stat_counter MulticastFramesReceivedOK;
+	struct fbnic_stat_counter BroadcastFramesReceivedOK;
+	struct fbnic_stat_counter FrameTooLongErrors;
+};
+
+struct fbnic_mac_stats {
+	struct fbnic_eth_mac_stats eth_mac;
+};
+
+struct fbnic_hw_stats {
+	struct fbnic_mac_stats mac;
+};
+
+u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset);
+
+void fbnic_get_hw_stats(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
index 7920e7af82d9..7b654d0a6dac 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
@@ -403,6 +403,21 @@ static void fbnic_mac_init_regs(struct fbnic_dev *fbd)
 	fbnic_mac_init_txb(fbd);
 }
 
+static void __fbnic_mac_stat_rd64(struct fbnic_dev *fbd, bool reset, u32 reg,
+				  struct fbnic_stat_counter *stat)
+{
+	u64 new_reg_value;
+
+	new_reg_value = fbnic_stat_rd64(fbd, reg, 1);
+	if (!reset)
+		stat->value += new_reg_value - stat->u.old_reg_value_64;
+	stat->u.old_reg_value_64 = new_reg_value;
+	stat->reported = true;
+}
+
+#define fbnic_mac_stat_rd64(fbd, reset, __stat, __CSR) \
+	__fbnic_mac_stat_rd64(fbd, reset, FBNIC_##__CSR##_L, &(__stat))
+
 static void fbnic_mac_tx_pause_config(struct fbnic_dev *fbd, bool tx_pause)
 {
 	u32 rxb_pause_ctrl;
@@ -637,12 +652,47 @@ static void fbnic_mac_link_up_asic(struct fbnic_dev *fbd,
 	wr32(fbd, FBNIC_MAC_COMMAND_CONFIG, cmd_cfg);
 }
 
+static void
+fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset,
+			    struct fbnic_eth_mac_stats *mac_stats)
+{
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->OctetsReceivedOK,
+			    MAC_STAT_RX_BYTE_COUNT);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->AlignmentErrors,
+			    MAC_STAT_RX_ALIGN_ERROR);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->FrameTooLongErrors,
+			    MAC_STAT_RX_TOOLONG);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->FramesReceivedOK,
+			    MAC_STAT_RX_RECEIVED_OK);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->FrameCheckSequenceErrors,
+			    MAC_STAT_RX_PACKET_BAD_FCS);
+	fbnic_mac_stat_rd64(fbd, reset,
+			    mac_stats->FramesLostDueToIntMACRcvError,
+			    MAC_STAT_RX_IFINERRORS);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->MulticastFramesReceivedOK,
+			    MAC_STAT_RX_MULTICAST);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->BroadcastFramesReceivedOK,
+			    MAC_STAT_RX_BROADCAST);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->OctetsTransmittedOK,
+			    MAC_STAT_TX_BYTE_COUNT);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->FramesTransmittedOK,
+			    MAC_STAT_TX_TRANSMITTED_OK);
+	fbnic_mac_stat_rd64(fbd, reset,
+			    mac_stats->FramesLostDueToIntMACXmitError,
+			    MAC_STAT_TX_IFOUTERRORS);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->MulticastFramesXmittedOK,
+			    MAC_STAT_TX_MULTICAST);
+	fbnic_mac_stat_rd64(fbd, reset, mac_stats->BroadcastFramesXmittedOK,
+			    MAC_STAT_TX_BROADCAST);
+}
+
 static const struct fbnic_mac fbnic_mac_asic = {
 	.init_regs = fbnic_mac_init_regs,
 	.pcs_enable = fbnic_pcs_enable_asic,
 	.pcs_disable = fbnic_pcs_disable_asic,
 	.pcs_get_link = fbnic_pcs_get_link_asic,
 	.pcs_get_link_event = fbnic_pcs_get_link_event_asic,
+	.get_eth_mac_stats = fbnic_mac_get_eth_mac_stats,
 	.link_down = fbnic_mac_link_down_asic,
 	.link_up = fbnic_mac_link_up_asic,
 };
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
index f53be6e6aef9..476239a9d381 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
@@ -78,6 +78,9 @@ struct fbnic_mac {
 	bool (*pcs_get_link)(struct fbnic_dev *fbd);
 	int (*pcs_get_link_event)(struct fbnic_dev *fbd);
 
+	void (*get_eth_mac_stats)(struct fbnic_dev *fbd, bool reset,
+				  struct fbnic_eth_mac_stats *mac_stats);
+
 	void (*link_down)(struct fbnic_dev *fbd);
 	void (*link_up)(struct fbnic_dev *fbd, bool tx_pause, bool rx_pause);
 };
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index b7ce6da68543..a400616a24d4 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
+#include <net/netdev_queues.h>
 
 #include "fbnic.h"
 #include "fbnic_netdev.h"
@@ -316,6 +317,74 @@ void fbnic_clear_rx_mode(struct net_device *netdev)
 	__dev_mc_unsync(netdev, NULL);
 }
 
+static void fbnic_get_stats64(struct net_device *dev,
+			      struct rtnl_link_stats64 *stats64)
+{
+	u64 tx_bytes, tx_packets, tx_dropped = 0;
+	u64 rx_bytes, rx_packets, rx_dropped = 0;
+	struct fbnic_net *fbn = netdev_priv(dev);
+	struct fbnic_queue_stats *stats;
+	unsigned int start, i;
+
+	stats = &fbn->tx_stats;
+
+	tx_bytes = stats->bytes;
+	tx_packets = stats->packets;
+	tx_dropped = stats->dropped;
+
+	stats64->tx_bytes = tx_bytes;
+	stats64->tx_packets = tx_packets;
+	stats64->tx_dropped = tx_dropped;
+
+	for (i = 0; i < fbn->num_tx_queues; i++) {
+		struct fbnic_ring *txr = fbn->tx[i];
+
+		if (!txr)
+			continue;
+
+		stats = &txr->stats;
+		do {
+			start = u64_stats_fetch_begin(&stats->syncp);
+			tx_bytes = stats->bytes;
+			tx_packets = stats->packets;
+			tx_dropped = stats->dropped;
+		} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+		stats64->tx_bytes += tx_bytes;
+		stats64->tx_packets += tx_packets;
+		stats64->tx_dropped += tx_dropped;
+	}
+
+	stats = &fbn->rx_stats;
+
+	rx_bytes = stats->bytes;
+	rx_packets = stats->packets;
+	rx_dropped = stats->dropped;
+
+	stats64->rx_bytes = rx_bytes;
+	stats64->rx_packets = rx_packets;
+	stats64->rx_dropped = rx_dropped;
+
+	for (i = 0; i < fbn->num_rx_queues; i++) {
+		struct fbnic_ring *rxr = fbn->rx[i];
+
+		if (!rxr)
+			continue;
+
+		stats = &rxr->stats;
+		do {
+			start = u64_stats_fetch_begin(&stats->syncp);
+			rx_bytes = stats->bytes;
+			rx_packets = stats->packets;
+			rx_dropped = stats->dropped;
+		} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+		stats64->rx_bytes += rx_bytes;
+		stats64->rx_packets += rx_packets;
+		stats64->rx_dropped += rx_dropped;
+	}
+}
+
 static const struct net_device_ops fbnic_netdev_ops = {
 	.ndo_open		= fbnic_open,
 	.ndo_stop		= fbnic_stop,
@@ -324,6 +393,72 @@ static const struct net_device_ops fbnic_netdev_ops = {
 	.ndo_features_check	= fbnic_features_check,
 	.ndo_set_mac_address	= fbnic_set_mac,
 	.ndo_set_rx_mode	= fbnic_set_rx_mode,
+	.ndo_get_stats64	= fbnic_get_stats64,
+};
+
+static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
+				     struct netdev_queue_stats_rx *rx)
+{
+	struct fbnic_net *fbn = netdev_priv(dev);
+	struct fbnic_ring *rxr = fbn->rx[idx];
+	struct fbnic_queue_stats *stats;
+	unsigned int start;
+	u64 bytes, packets;
+
+	if (!rxr)
+		return;
+
+	stats = &rxr->stats;
+	do {
+		start = u64_stats_fetch_begin(&stats->syncp);
+		bytes = stats->bytes;
+		packets = stats->packets;
+	} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+	rx->bytes = bytes;
+	rx->packets = packets;
+}
+
+static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
+				     struct netdev_queue_stats_tx *tx)
+{
+	struct fbnic_net *fbn = netdev_priv(dev);
+	struct fbnic_ring *txr = fbn->tx[idx];
+	struct fbnic_queue_stats *stats;
+	unsigned int start;
+	u64 bytes, packets;
+
+	if (!txr)
+		return;
+
+	stats = &txr->stats;
+	do {
+		start = u64_stats_fetch_begin(&stats->syncp);
+		bytes = stats->bytes;
+		packets = stats->packets;
+	} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+	tx->bytes = bytes;
+	tx->packets = packets;
+}
+
+static void fbnic_get_base_stats(struct net_device *dev,
+				 struct netdev_queue_stats_rx *rx,
+				 struct netdev_queue_stats_tx *tx)
+{
+	struct fbnic_net *fbn = netdev_priv(dev);
+
+	tx->bytes = fbn->tx_stats.bytes;
+	tx->packets = fbn->tx_stats.packets;
+
+	rx->bytes = fbn->rx_stats.bytes;
+	rx->packets = fbn->rx_stats.packets;
+}
+
+static const struct netdev_stat_ops fbnic_stat_ops = {
+	.get_queue_stats_rx	= fbnic_get_queue_stats_rx,
+	.get_queue_stats_tx	= fbnic_get_queue_stats_tx,
+	.get_base_stats		= fbnic_get_base_stats,
 };
 
 void fbnic_reset_queues(struct fbnic_net *fbn,
@@ -384,6 +519,9 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
 	fbd->netdev = netdev;
 
 	netdev->netdev_ops = &fbnic_netdev_ops;
+	netdev->stat_ops = &fbnic_stat_ops;
+
+	fbnic_set_ethtool_ops(netdev);
 
 	fbn = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index 6bc0ebeb8182..6c27da09a612 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -40,6 +40,9 @@ struct fbnic_net {
 	u32 rss_key[FBNIC_RPC_RSS_KEY_DWORD_LEN];
 	u32 rss_flow_hash[FBNIC_NUM_HASH_OPT];
 
+	/* Storage for stats after ring destruction */
+	struct fbnic_queue_stats tx_stats;
+	struct fbnic_queue_stats rx_stats;
 	u64 link_down_events;
 
 	struct list_head napis;
@@ -55,6 +58,7 @@ int fbnic_netdev_register(struct net_device *netdev);
 void fbnic_netdev_unregister(struct net_device *netdev);
 void fbnic_reset_queues(struct fbnic_net *fbn,
 			unsigned int tx, unsigned int rx);
+void fbnic_set_ethtool_ops(struct net_device *dev);
 
 void __fbnic_set_rx_mode(struct net_device *netdev);
 void fbnic_clear_rx_mode(struct net_device *netdev);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 0ed4c9fff5d8..6a6d7e22f1a7 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -273,6 +273,9 @@ fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
 err_free:
 	dev_kfree_skb_any(skb);
 err_count:
+	u64_stats_update_begin(&ring->stats.syncp);
+	ring->stats.dropped++;
+	u64_stats_update_end(&ring->stats.syncp);
 	return NETDEV_TX_OK;
 }
 
@@ -363,10 +366,19 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
 	txq = txring_txq(nv->napi.dev, ring);
 
 	if (unlikely(discard)) {
+		u64_stats_update_begin(&ring->stats.syncp);
+		ring->stats.dropped += total_packets;
+		u64_stats_update_end(&ring->stats.syncp);
+
 		netdev_tx_completed_queue(txq, total_packets, total_bytes);
 		return;
 	}
 
+	u64_stats_update_begin(&ring->stats.syncp);
+	ring->stats.bytes += total_bytes;
+	ring->stats.packets += total_packets;
+	u64_stats_update_end(&ring->stats.syncp);
+
 	netif_txq_completed_wake(txq, total_packets, total_bytes,
 				 fbnic_desc_unused(ring),
 				 FBNIC_TX_DESC_WAKEUP);
@@ -730,12 +742,12 @@ static bool fbnic_rcd_metadata_err(u64 rcd)
 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
 			   struct fbnic_q_triad *qt, int budget)
 {
+	unsigned int packets = 0, bytes = 0, dropped = 0;
 	struct fbnic_ring *rcq = &qt->cmpl;
 	struct fbnic_pkt_buff *pkt;
 	s32 head0 = -1, head1 = -1;
 	__le64 *raw_rcd, done;
 	u32 head = rcq->head;
-	u64 packets = 0;
 
 	done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0;
 	raw_rcd = &rcq->desc[head & rcq->size_mask];
@@ -780,9 +792,11 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
 				fbnic_populate_skb_fields(nv, rcd, skb, qt);
 
 				packets++;
+				bytes += skb->len;
 
 				napi_gro_receive(&nv->napi, skb);
 			} else {
+				dropped++;
 				fbnic_put_pkt_buff(nv, pkt, 1);
 			}
 
@@ -799,6 +813,14 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
 		}
 	}
 
+	u64_stats_update_begin(&rcq->stats.syncp);
+	rcq->stats.packets += packets;
+	rcq->stats.bytes += bytes;
+	/* Re-add ethernet header length (removed in fbnic_build_skb) */
+	rcq->stats.bytes += ETH_HLEN * packets;
+	rcq->stats.dropped += dropped;
+	u64_stats_update_end(&rcq->stats.syncp);
+
 	/* Unmap and free processed buffers */
 	if (head0 >= 0)
 		fbnic_clean_bdq(nv, budget, &qt->sub0, head0);
@@ -865,12 +887,36 @@ static irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
+					     struct fbnic_ring *rxr)
+{
+	struct fbnic_queue_stats *stats = &rxr->stats;
+
+	/* Capture stats from queues before dissasociating them */
+	fbn->rx_stats.bytes += stats->bytes;
+	fbn->rx_stats.packets += stats->packets;
+	fbn->rx_stats.dropped += stats->dropped;
+}
+
+static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
+					     struct fbnic_ring *txr)
+{
+	struct fbnic_queue_stats *stats = &txr->stats;
+
+	/* Capture stats from queues before dissasociating them */
+	fbn->tx_stats.bytes += stats->bytes;
+	fbn->tx_stats.packets += stats->packets;
+	fbn->tx_stats.dropped += stats->dropped;
+}
+
 static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
 				 struct fbnic_ring *txr)
 {
 	if (!(txr->flags & FBNIC_RING_F_STATS))
 		return;
 
+	fbnic_aggregate_ring_tx_counters(fbn, txr);
+
 	/* Remove pointer to the Tx ring */
 	WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr);
 	fbn->tx[txr->q_idx] = NULL;
@@ -882,6 +928,8 @@ static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
 	if (!(rxr->flags & FBNIC_RING_F_STATS))
 		return;
 
+	fbnic_aggregate_ring_rx_counters(fbn, rxr);
+
 	/* Remove pointer to the Rx ring */
 	WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr);
 	fbn->rx[rxr->q_idx] = NULL;
@@ -974,6 +1022,7 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
 			    int q_idx, u8 flags)
 {
+	u64_stats_init(&ring->stats.syncp);
 	ring->doorbell = doorbell;
 	ring->q_idx = q_idx;
 	ring->flags = flags;
@@ -1012,14 +1061,14 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
 	nv->fbd = fbd;
 	nv->v_idx = v_idx;
 
-	/* Record IRQ to NAPI struct */
-	netif_napi_set_irq(&nv->napi,
-			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
-
 	/* Tie napi to netdev */
 	list_add(&nv->napis, &fbn->napis);
 	netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
 
+	/* Record IRQ to NAPI struct */
+	netif_napi_set_irq(&nv->napi,
+			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
+
 	/* Tie nv back to PCIe dev */
 	nv->dev = fbd->dev;
 
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index 4a206c0e7192..2f91f68d11d5 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -7,6 +7,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <linux/u64_stats_sync.h>
 #include <net/xdp.h>
 
 struct fbnic_net;
@@ -51,6 +52,13 @@ struct fbnic_pkt_buff {
 	u16 nr_frags;
 };
 
+struct fbnic_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 dropped;
+	struct u64_stats_sync syncp;
+};
+
 /* Pagecnt bias is long max to reserve the last bit to catch overflow
  * cases where if we overcharge the bias it will flip over to be negative.
  */
@@ -77,6 +85,8 @@ struct fbnic_ring {
 
 	u32 head, tail;			/* Head/Tail of ring */
 
+	struct fbnic_queue_stats stats;
+
 	/* Slow path fields follow */
 	dma_addr_t dma;			/* Phys addr of descriptor memory */
 	size_t size;			/* Size of descriptor ring in memory */
diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index 43ba71e82260..ee046468652c 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -46,18 +46,21 @@ config LAN743X
 	tristate "LAN743x support"
 	depends on PCI
 	depends on PTP_1588_CLOCK_OPTIONAL
-	select PHYLIB
 	select FIXED_PHY
 	select CRC16
 	select CRC32
+	select PHYLINK
 	help
-	  Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
+	  Support for the Microchip LAN743x and PCI11x1x families of PCI
+	  Express Ethernet devices
 
 	  To compile this driver as a module, choose M here. The module will be
 	  called lan743x.
 
+source "drivers/net/ethernet/microchip/lan865x/Kconfig"
 source "drivers/net/ethernet/microchip/lan966x/Kconfig"
 source "drivers/net/ethernet/microchip/sparx5/Kconfig"
 source "drivers/net/ethernet/microchip/vcap/Kconfig"
+source "drivers/net/ethernet/microchip/fdma/Kconfig"
 
 endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index bbd349264e6f..3c65baed9fd8 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile
@@ -9,6 +9,8 @@ obj-$(CONFIG_LAN743X) += lan743x.o
 
 lan743x-objs := lan743x_main.o lan743x_ethtool.o lan743x_ptp.o
 
+obj-$(CONFIG_LAN865X) += lan865x/
 obj-$(CONFIG_LAN966X_SWITCH) += lan966x/
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5/
 obj-$(CONFIG_VCAP) += vcap/
+obj-$(CONFIG_FDMA) += fdma/
diff --git a/drivers/net/ethernet/microchip/fdma/Kconfig b/drivers/net/ethernet/microchip/fdma/Kconfig
new file mode 100644
index 000000000000..59159ad6701a
--- /dev/null
+++ b/drivers/net/ethernet/microchip/fdma/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Microchip FDMA API configuration
+#
+
+if NET_VENDOR_MICROCHIP
+
+config FDMA
+	bool "FDMA API"
+	help
+	  Provides the basic FDMA functionality for multiple Microchip
+	  switchcores.
+
+	  Say Y here if you want to build the FDMA API that provides a common
+	  set of functions and data structures for interacting with the Frame
+	  DMA engine in multiple microchip switchcores.
+
+endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/fdma/Makefile b/drivers/net/ethernet/microchip/fdma/Makefile
new file mode 100644
index 000000000000..cc9a736be357
--- /dev/null
+++ b/drivers/net/ethernet/microchip/fdma/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Microchip FDMA
+#
+
+obj-$(CONFIG_FDMA) += fdma.o
+fdma-y += fdma_api.o
diff --git a/drivers/net/ethernet/microchip/fdma/fdma_api.c b/drivers/net/ethernet/microchip/fdma/fdma_api.c
new file mode 100644
index 000000000000..e78c3590da9e
--- /dev/null
+++ b/drivers/net/ethernet/microchip/fdma/fdma_api.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "fdma_api.h"
+
+#include <linux/bits.h>
+#include <linux/etherdevice.h>
+#include <linux/types.h>
+
+/* Add a DB to a DCB, providing a callback for getting the DB dataptr. */
+static int __fdma_db_add(struct fdma *fdma, int dcb_idx, int db_idx, u64 status,
+			 int (*cb)(struct fdma *fdma, int dcb_idx,
+				   int db_idx, u64 *dataptr))
+{
+	struct fdma_db *db = fdma_db_get(fdma, dcb_idx, db_idx);
+
+	db->status = status;
+
+	return cb(fdma, dcb_idx, db_idx, &db->dataptr);
+}
+
+/* Add a DB to a DCB, using the callback set in the fdma_ops struct. */
+int fdma_db_add(struct fdma *fdma, int dcb_idx, int db_idx, u64 status)
+{
+	return __fdma_db_add(fdma,
+			     dcb_idx,
+			     db_idx,
+			     status,
+			     fdma->ops.dataptr_cb);
+}
+
+/* Add a DCB with callbacks for getting the DB dataptr and the DCB nextptr. */
+int __fdma_dcb_add(struct fdma *fdma, int dcb_idx, u64 info, u64 status,
+		   int (*dcb_cb)(struct fdma *fdma, int dcb_idx, u64 *nextptr),
+		   int (*db_cb)(struct fdma *fdma, int dcb_idx, int db_idx,
+				u64 *dataptr))
+{
+	struct fdma_dcb *dcb = fdma_dcb_get(fdma, dcb_idx);
+	int i, err;
+
+	for (i = 0; i < fdma->n_dbs; i++) {
+		err = __fdma_db_add(fdma, dcb_idx, i, status, db_cb);
+		if (unlikely(err))
+			return err;
+	}
+
+	err = dcb_cb(fdma, dcb_idx, &fdma->last_dcb->nextptr);
+	if (unlikely(err))
+		return err;
+
+	fdma->last_dcb = dcb;
+
+	dcb->nextptr = FDMA_DCB_INVALID_DATA;
+	dcb->info = info;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__fdma_dcb_add);
+
+/* Add a DCB, using the preset callbacks in the fdma_ops struct. */
+int fdma_dcb_add(struct fdma *fdma, int dcb_idx, u64 info, u64 status)
+{
+	return __fdma_dcb_add(fdma,
+			      dcb_idx,
+			      info, status,
+			      fdma->ops.nextptr_cb,
+			      fdma->ops.dataptr_cb);
+}
+EXPORT_SYMBOL_GPL(fdma_dcb_add);
+
+/* Initialize the DCB's and DB's. */
+int fdma_dcbs_init(struct fdma *fdma, u64 info, u64 status)
+{
+	int i, err;
+
+	fdma->last_dcb = fdma->dcbs;
+	fdma->db_index = 0;
+	fdma->dcb_index = 0;
+
+	for (i = 0; i < fdma->n_dcbs; i++) {
+		err = fdma_dcb_add(fdma, i, info, status);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fdma_dcbs_init);
+
+/* Allocate coherent DMA memory for FDMA. */
+int fdma_alloc_coherent(struct device *dev, struct fdma *fdma)
+{
+	fdma->dcbs = dma_alloc_coherent(dev,
+					fdma->size,
+					&fdma->dma,
+					GFP_KERNEL);
+	if (!fdma->dcbs)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fdma_alloc_coherent);
+
+/* Allocate physical memory for FDMA. */
+int fdma_alloc_phys(struct fdma *fdma)
+{
+	fdma->dcbs = kzalloc(fdma->size, GFP_KERNEL);
+	if (!fdma->dcbs)
+		return -ENOMEM;
+
+	fdma->dma = virt_to_phys(fdma->dcbs);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fdma_alloc_phys);
+
+/* Free coherent DMA memory. */
+void fdma_free_coherent(struct device *dev, struct fdma *fdma)
+{
+	dma_free_coherent(dev, fdma->size, fdma->dcbs, fdma->dma);
+}
+EXPORT_SYMBOL_GPL(fdma_free_coherent);
+
+/* Free virtual memory. */
+void fdma_free_phys(struct fdma *fdma)
+{
+	kfree(fdma->dcbs);
+}
+EXPORT_SYMBOL_GPL(fdma_free_phys);
+
+/* Get the size of the FDMA memory */
+u32 fdma_get_size(struct fdma *fdma)
+{
+	return ALIGN(sizeof(struct fdma_dcb) * fdma->n_dcbs, PAGE_SIZE);
+}
+EXPORT_SYMBOL_GPL(fdma_get_size);
+
+/* Get the size of the FDMA memory. This function is only applicable if the
+ * dataptr addresses and DCB's are in contiguous memory.
+ */
+u32 fdma_get_size_contiguous(struct fdma *fdma)
+{
+	return ALIGN(fdma->n_dcbs * sizeof(struct fdma_dcb) +
+		     fdma->n_dcbs * fdma->n_dbs * fdma->db_size,
+		     PAGE_SIZE);
+}
+EXPORT_SYMBOL_GPL(fdma_get_size_contiguous);
diff --git a/drivers/net/ethernet/microchip/fdma/fdma_api.h b/drivers/net/ethernet/microchip/fdma/fdma_api.h
new file mode 100644
index 000000000000..d91affe8bd98
--- /dev/null
+++ b/drivers/net/ethernet/microchip/fdma/fdma_api.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _FDMA_API_H_
+#define _FDMA_API_H_
+
+#include <linux/bits.h>
+#include <linux/etherdevice.h>
+#include <linux/types.h>
+
+/* This provides a common set of functions and data structures for interacting
+ * with the Frame DMA engine on multiple Microchip switchcores.
+ *
+ * Frame DMA DCB format:
+ *
+ * +---------------------------+
+ * |         Next Ptr          |
+ * +---------------------------+
+ * |   Reserved  |    Info     |
+ * +---------------------------+
+ * |         Data0 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status0  |
+ * +---------------------------+
+ * |         Data1 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status1  |
+ * +---------------------------+
+ * |         Data2 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status2  |
+ * |-------------|-------------|
+ * |                           |
+ * |                           |
+ * |                           |
+ * |                           |
+ * |                           |
+ * |---------------------------|
+ * |         Data14 Ptr        |
+ * +-------------|-------------+
+ * |   Reserved  |    Status14 |
+ * +-------------|-------------+
+ *
+ * The data pointers points to the actual frame data to be received or sent. The
+ * addresses of the data pointers can, as of writing, be either a: DMA address,
+ * physical address or mapped address.
+ *
+ */
+
+#define FDMA_DCB_INFO_DATAL(x)		((x) & GENMASK(15, 0))
+#define FDMA_DCB_INFO_TOKEN		BIT(17)
+#define FDMA_DCB_INFO_INTR		BIT(18)
+#define FDMA_DCB_INFO_SW(x)		(((x) << 24) & GENMASK(31, 24))
+
+#define FDMA_DCB_STATUS_BLOCKL(x)	((x) & GENMASK(15, 0))
+#define FDMA_DCB_STATUS_SOF		BIT(16)
+#define FDMA_DCB_STATUS_EOF		BIT(17)
+#define FDMA_DCB_STATUS_INTR		BIT(18)
+#define FDMA_DCB_STATUS_DONE		BIT(19)
+#define FDMA_DCB_STATUS_BLOCKO(x)	(((x) << 20) & GENMASK(31, 20))
+#define FDMA_DCB_INVALID_DATA		0x1
+
+#define FDMA_DB_MAX			15 /* Max number of DB's on Sparx5 */
+
+struct fdma;
+
+struct fdma_db {
+	u64 dataptr;
+	u64 status;
+};
+
+struct fdma_dcb {
+	u64 nextptr;
+	u64 info;
+	struct fdma_db db[FDMA_DB_MAX];
+};
+
+struct fdma_ops {
+	/* User-provided callback to set the dataptr */
+	int (*dataptr_cb)(struct fdma *fdma, int dcb_idx, int db_idx, u64 *ptr);
+	/* User-provided callback to set the nextptr */
+	int (*nextptr_cb)(struct fdma *fdma, int dcb_idx, u64 *ptr);
+};
+
+struct fdma {
+	void *priv;
+
+	/* Virtual addresses */
+	struct fdma_dcb *dcbs;
+	struct fdma_dcb *last_dcb;
+
+	/* DMA address */
+	dma_addr_t dma;
+
+	/* Size of DCB + DB memory */
+	int size;
+
+	/* Indexes used to access the next-to-be-used DCB or DB */
+	int db_index;
+	int dcb_index;
+
+	/* Number of DCB's and DB's */
+	u32 n_dcbs;
+	u32 n_dbs;
+
+	/* Size of DB's */
+	u32 db_size;
+
+	/* Channel id this FDMA object operates on */
+	u32 channel_id;
+
+	struct fdma_ops ops;
+};
+
+/* Advance the DCB index and wrap if required. */
+static inline void fdma_dcb_advance(struct fdma *fdma)
+{
+	fdma->dcb_index++;
+	if (fdma->dcb_index >= fdma->n_dcbs)
+		fdma->dcb_index = 0;
+}
+
+/* Advance the DB index. */
+static inline void fdma_db_advance(struct fdma *fdma)
+{
+	fdma->db_index++;
+}
+
+/* Reset the db index to zero. */
+static inline void fdma_db_reset(struct fdma *fdma)
+{
+	fdma->db_index = 0;
+}
+
+/* Check if a DCB can be reused in case of multiple DB's per DCB. */
+static inline bool fdma_dcb_is_reusable(struct fdma *fdma)
+{
+	return fdma->db_index != fdma->n_dbs;
+}
+
+/* Check if the FDMA has marked this DB as done. */
+static inline bool fdma_db_is_done(struct fdma_db *db)
+{
+	return db->status & FDMA_DCB_STATUS_DONE;
+}
+
+/* Get the length of a DB. */
+static inline int fdma_db_len_get(struct fdma_db *db)
+{
+	return FDMA_DCB_STATUS_BLOCKL(db->status);
+}
+
+/* Set the length of a DB. */
+static inline void fdma_dcb_len_set(struct fdma_dcb *dcb, u32 len)
+{
+	dcb->info = FDMA_DCB_INFO_DATAL(len);
+}
+
+/* Get a DB by index. */
+static inline struct fdma_db *fdma_db_get(struct fdma *fdma, int dcb_idx,
+					  int db_idx)
+{
+	return &fdma->dcbs[dcb_idx].db[db_idx];
+}
+
+/* Get the next DB. */
+static inline struct fdma_db *fdma_db_next_get(struct fdma *fdma)
+{
+	return fdma_db_get(fdma, fdma->dcb_index, fdma->db_index);
+}
+
+/* Get a DCB by index. */
+static inline struct fdma_dcb *fdma_dcb_get(struct fdma *fdma, int dcb_idx)
+{
+	return &fdma->dcbs[dcb_idx];
+}
+
+/* Get the next DCB. */
+static inline struct fdma_dcb *fdma_dcb_next_get(struct fdma *fdma)
+{
+	return fdma_dcb_get(fdma, fdma->dcb_index);
+}
+
+/* Check if the FDMA has frames ready for extraction. */
+static inline bool fdma_has_frames(struct fdma *fdma)
+{
+	return fdma_db_is_done(fdma_db_next_get(fdma));
+}
+
+/* Get a nextptr by index */
+static inline int fdma_nextptr_cb(struct fdma *fdma, int dcb_idx, u64 *nextptr)
+{
+	*nextptr = fdma->dma + (sizeof(struct fdma_dcb) * dcb_idx);
+	return 0;
+}
+
+/* Get the DMA address of a dataptr, by index. This function is only applicable
+ * if the dataptr addresses and DCB's are in contiguous memory and the driver
+ * supports XDP.
+ */
+static inline u64 fdma_dataptr_get_contiguous(struct fdma *fdma, int dcb_idx,
+					      int db_idx)
+{
+	return fdma->dma + (sizeof(struct fdma_dcb) * fdma->n_dcbs) +
+	       (dcb_idx * fdma->n_dbs + db_idx) * fdma->db_size +
+	       XDP_PACKET_HEADROOM;
+}
+
+/* Get the virtual address of a dataptr, by index. This function is only
+ * applicable if the dataptr addresses and DCB's are in contiguous memory and
+ * the driver supports XDP.
+ */
+static inline void *fdma_dataptr_virt_get_contiguous(struct fdma *fdma,
+						     int dcb_idx, int db_idx)
+{
+	return (u8 *)fdma->dcbs + (sizeof(struct fdma_dcb) * fdma->n_dcbs) +
+	       (dcb_idx * fdma->n_dbs + db_idx) * fdma->db_size +
+	       XDP_PACKET_HEADROOM;
+}
+
+/* Check if this DCB is the last used DCB. */
+static inline bool fdma_is_last(struct fdma *fdma, struct fdma_dcb *dcb)
+{
+	return dcb == fdma->last_dcb;
+}
+
+int fdma_dcbs_init(struct fdma *fdma, u64 info, u64 status);
+int fdma_db_add(struct fdma *fdma, int dcb_idx, int db_idx, u64 status);
+int fdma_dcb_add(struct fdma *fdma, int dcb_idx, u64 info, u64 status);
+int __fdma_dcb_add(struct fdma *fdma, int dcb_idx, u64 info, u64 status,
+		   int (*dcb_cb)(struct fdma *fdma, int dcb_idx, u64 *nextptr),
+		   int (*db_cb)(struct fdma *fdma, int dcb_idx, int db_idx,
+				u64 *dataptr));
+
+int fdma_alloc_coherent(struct device *dev, struct fdma *fdma);
+int fdma_alloc_phys(struct fdma *fdma);
+
+void fdma_free_coherent(struct device *dev, struct fdma *fdma);
+void fdma_free_phys(struct fdma *fdma);
+
+u32 fdma_get_size(struct fdma *fdma);
+u32 fdma_get_size_contiguous(struct fdma *fdma);
+
+#endif
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index 3a63ec091413..1a1cbd034eda 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -1034,16 +1034,12 @@ static int lan743x_ethtool_get_ts_info(struct net_device *netdev,
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
 
 	ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				   SOF_TIMESTAMPING_RX_SOFTWARE |
-				   SOF_TIMESTAMPING_SOFTWARE |
 				   SOF_TIMESTAMPING_TX_HARDWARE |
 				   SOF_TIMESTAMPING_RX_HARDWARE |
 				   SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (adapter->ptp.ptp_clock)
 		ts_info->phc_index = ptp_clock_index(adapter->ptp.ptp_clock);
-	else
-		ts_info->phc_index = -1;
 
 	ts_info->tx_types = BIT(HWTSTAMP_TX_OFF) |
 			    BIT(HWTSTAMP_TX_ON) |
@@ -1058,61 +1054,55 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev,
 				   struct ethtool_keee *eee)
 {
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
-	struct phy_device *phydev = netdev->phydev;
-	u32 buf;
-	int ret;
-
-	if (!phydev)
-		return -EIO;
-	if (!phydev->drv) {
-		netif_err(adapter, drv, adapter->netdev,
-			  "Missing PHY Driver\n");
-		return -EIO;
-	}
 
-	ret = phy_ethtool_get_eee(phydev, eee);
-	if (ret < 0)
-		return ret;
+	eee->tx_lpi_timer = lan743x_csr_read(adapter,
+					     MAC_EEE_TX_LPI_REQ_DLY_CNT);
 
-	buf = lan743x_csr_read(adapter, MAC_CR);
-	if (buf & MAC_CR_EEE_EN_) {
-		/* EEE_TX_LPI_REQ_DLY & tx_lpi_timer are same uSec unit */
-		buf = lan743x_csr_read(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT);
-		eee->tx_lpi_timer = buf;
-	} else {
-		eee->tx_lpi_timer = 0;
-	}
-
-	return 0;
+	return phylink_ethtool_get_eee(adapter->phylink, eee);
 }
 
 static int lan743x_ethtool_set_eee(struct net_device *netdev,
 				   struct ethtool_keee *eee)
 {
-	struct lan743x_adapter *adapter;
-	struct phy_device *phydev;
-	u32 buf = 0;
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	u32 tx_lpi_timer;
 
-	if (!netdev)
-		return -EINVAL;
-	adapter = netdev_priv(netdev);
-	if (!adapter)
-		return -EINVAL;
-	phydev = netdev->phydev;
-	if (!phydev)
-		return -EIO;
-	if (!phydev->drv) {
-		netif_err(adapter, drv, adapter->netdev,
-			  "Missing PHY Driver\n");
-		return -EIO;
-	}
+	tx_lpi_timer = lan743x_csr_read(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT);
+	if (tx_lpi_timer != eee->tx_lpi_timer) {
+		u32 mac_cr = lan743x_csr_read(adapter, MAC_CR);
+
+		/* Software should only change this field when Energy Efficient
+		 * Ethernet Enable (EEEEN) is cleared.
+		 * This function will trigger an autonegotiation restart and
+		 * eee will be reenabled during link up if eee was negotiated.
+		 */
+		lan743x_mac_eee_enable(adapter, false);
+		lan743x_csr_write(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT,
+				  eee->tx_lpi_timer);
 
-	if (eee->eee_enabled) {
-		buf = (u32)eee->tx_lpi_timer;
-		lan743x_csr_write(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT, buf);
+		if (mac_cr & MAC_CR_EEE_EN_)
+			lan743x_mac_eee_enable(adapter, true);
 	}
 
-	return phy_ethtool_set_eee(phydev, eee);
+	return phylink_ethtool_set_eee(adapter->phylink, eee);
+}
+
+static int
+lan743x_ethtool_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return phylink_ethtool_ksettings_set(adapter->phylink, cmd);
+}
+
+static int
+lan743x_ethtool_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return phylink_ethtool_ksettings_get(adapter->phylink, cmd);
 }
 
 #ifdef CONFIG_PM
@@ -1124,8 +1114,7 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev,
 	wol->supported = 0;
 	wol->wolopts = 0;
 
-	if (netdev->phydev)
-		phy_ethtool_get_wol(netdev->phydev, wol);
+	phylink_ethtool_get_wol(adapter->phylink, wol);
 
 	if (wol->supported != adapter->phy_wol_supported)
 		netif_warn(adapter, drv, adapter->netdev,
@@ -1166,7 +1155,7 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev,
 		    !(adapter->phy_wol_supported & WAKE_MAGICSECURE))
 			phy_wol.wolopts &= ~WAKE_MAGIC;
 
-		ret = phy_ethtool_set_wol(netdev->phydev, &phy_wol);
+		ret = phylink_ethtool_set_wol(adapter->phylink, wol);
 		if (ret && (ret != -EOPNOTSUPP))
 			return ret;
 
@@ -1355,44 +1344,16 @@ static void lan743x_get_pauseparam(struct net_device *dev,
 				   struct ethtool_pauseparam *pause)
 {
 	struct lan743x_adapter *adapter = netdev_priv(dev);
-	struct lan743x_phy *phy = &adapter->phy;
 
-	if (phy->fc_request_control & FLOW_CTRL_TX)
-		pause->tx_pause = 1;
-	if (phy->fc_request_control & FLOW_CTRL_RX)
-		pause->rx_pause = 1;
-	pause->autoneg = phy->fc_autoneg;
+	phylink_ethtool_get_pauseparam(adapter->phylink, pause);
 }
 
 static int lan743x_set_pauseparam(struct net_device *dev,
 				  struct ethtool_pauseparam *pause)
 {
 	struct lan743x_adapter *adapter = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	struct lan743x_phy *phy = &adapter->phy;
-
-	if (!phydev)
-		return -ENODEV;
-
-	if (!phy_validate_pause(phydev, pause))
-		return -EINVAL;
-
-	phy->fc_request_control = 0;
-	if (pause->rx_pause)
-		phy->fc_request_control |= FLOW_CTRL_RX;
 
-	if (pause->tx_pause)
-		phy->fc_request_control |= FLOW_CTRL_TX;
-
-	phy->fc_autoneg = pause->autoneg;
-
-	if (pause->autoneg == AUTONEG_DISABLE)
-		lan743x_mac_flow_ctrl_set_enables(adapter, pause->tx_pause,
-						  pause->rx_pause);
-	else
-		phy_set_asym_pause(phydev, pause->rx_pause,  pause->tx_pause);
-
-	return 0;
+	return phylink_ethtool_set_pauseparam(adapter->phylink, pause);
 }
 
 const struct ethtool_ops lan743x_ethtool_ops = {
@@ -1417,8 +1378,8 @@ const struct ethtool_ops lan743x_ethtool_ops = {
 	.get_ts_info = lan743x_ethtool_get_ts_info,
 	.get_eee = lan743x_ethtool_get_eee,
 	.set_eee = lan743x_ethtool_set_eee,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link_ksettings = lan743x_ethtool_get_link_ksettings,
+	.set_link_ksettings = lan743x_ethtool_set_link_ksettings,
 	.get_regs_len = lan743x_get_regs_len,
 	.get_regs = lan743x_get_regs,
 	.get_pauseparam = lan743x_get_pauseparam,
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e418539565b1..4dc5adcda6a3 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -15,6 +15,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/iopoll.h>
 #include <linux/crc16.h>
+#include <linux/phylink.h>
 #include "lan743x_main.h"
 #include "lan743x_ethtool.h"
 
@@ -992,6 +993,42 @@ static int lan743x_sgmii_write(struct lan743x_adapter *adapter,
 	return ret;
 }
 
+static int lan743x_get_lsd(int speed, int duplex, u8 mss)
+{
+	int lsd;
+
+	switch (speed) {
+	case SPEED_2500:
+		if (mss == MASTER_SLAVE_STATE_SLAVE)
+			lsd = LINK_2500_SLAVE;
+		else
+			lsd = LINK_2500_MASTER;
+		break;
+	case SPEED_1000:
+		if (mss == MASTER_SLAVE_STATE_SLAVE)
+			lsd = LINK_1000_SLAVE;
+		else
+			lsd = LINK_1000_MASTER;
+		break;
+	case SPEED_100:
+		if (duplex == DUPLEX_FULL)
+			lsd = LINK_100FD;
+		else
+			lsd = LINK_100HD;
+		break;
+	case SPEED_10:
+		if (duplex == DUPLEX_FULL)
+			lsd = LINK_10FD;
+		else
+			lsd = LINK_10HD;
+		break;
+	default:
+		lsd = -EINVAL;
+	}
+
+	return lsd;
+}
+
 static int lan743x_sgmii_mpll_set(struct lan743x_adapter *adapter,
 				  u16 baud)
 {
@@ -1041,26 +1078,7 @@ static int lan743x_sgmii_2_5G_mode_set(struct lan743x_adapter *adapter,
 					      VR_MII_BAUD_RATE_1P25GBPS);
 }
 
-static int lan743x_is_sgmii_2_5G_mode(struct lan743x_adapter *adapter,
-				      bool *status)
-{
-	int ret;
-
-	ret = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2,
-				 VR_MII_GEN2_4_MPLL_CTRL1);
-	if (ret < 0)
-		return ret;
-
-	if (ret == VR_MII_MPLL_MULTIPLIER_125 ||
-	    ret == VR_MII_MPLL_MULTIPLIER_50)
-		*status = true;
-	else
-		*status = false;
-
-	return 0;
-}
-
-static int lan743x_sgmii_aneg_update(struct lan743x_adapter *adapter)
+static int lan743x_serdes_clock_and_aneg_update(struct lan743x_adapter *adapter)
 {
 	enum lan743x_sgmii_lsd lsd = adapter->sgmii_lsd;
 	int mii_ctrl;
@@ -1147,68 +1165,11 @@ static int lan743x_pcs_seq_state(struct lan743x_adapter *adapter, u8 state)
 	return 0;
 }
 
-static int lan743x_sgmii_config(struct lan743x_adapter *adapter)
+static int lan743x_pcs_power_reset(struct lan743x_adapter *adapter)
 {
-	struct net_device *netdev = adapter->netdev;
-	struct phy_device *phydev = netdev->phydev;
-	enum lan743x_sgmii_lsd lsd = POWER_DOWN;
 	int mii_ctl;
-	bool status;
 	int ret;
 
-	switch (phydev->speed) {
-	case SPEED_2500:
-		if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER)
-			lsd = LINK_2500_MASTER;
-		else
-			lsd = LINK_2500_SLAVE;
-		break;
-	case SPEED_1000:
-		if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER)
-			lsd = LINK_1000_MASTER;
-		else
-			lsd = LINK_1000_SLAVE;
-		break;
-	case SPEED_100:
-		if (phydev->duplex)
-			lsd = LINK_100FD;
-		else
-			lsd = LINK_100HD;
-		break;
-	case SPEED_10:
-		if (phydev->duplex)
-			lsd = LINK_10FD;
-		else
-			lsd = LINK_10HD;
-		break;
-	default:
-		netif_err(adapter, drv, adapter->netdev,
-			  "Invalid speed %d\n", phydev->speed);
-		return -EINVAL;
-	}
-
-	adapter->sgmii_lsd = lsd;
-	ret = lan743x_sgmii_aneg_update(adapter);
-	if (ret < 0) {
-		netif_err(adapter, drv, adapter->netdev,
-			  "error %d SGMII cfg failed\n", ret);
-		return ret;
-	}
-
-	ret = lan743x_is_sgmii_2_5G_mode(adapter, &status);
-	if (ret < 0) {
-		netif_err(adapter, drv, adapter->netdev,
-			  "error %d SGMII get mode failed\n", ret);
-		return ret;
-	}
-
-	if (status)
-		netif_dbg(adapter, drv, adapter->netdev,
-			  "SGMII 2.5G mode enable\n");
-	else
-		netif_dbg(adapter, drv, adapter->netdev,
-			  "SGMII 1G mode enable\n");
-
 	/* SGMII/1000/2500BASE-X PCS power down */
 	mii_ctl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, MII_BMCR);
 	if (mii_ctl < 0)
@@ -1229,11 +1190,7 @@ static int lan743x_sgmii_config(struct lan743x_adapter *adapter)
 	if (ret < 0)
 		return ret;
 
-	ret = lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_UP);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_UP);
 }
 
 static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
@@ -1389,103 +1346,11 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter)
 				  50000, 1000000);
 }
 
-static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
-					   u16 local_adv, u16 remote_adv)
-{
-	struct lan743x_phy *phy = &adapter->phy;
-	u8 cap;
-
-	if (phy->fc_autoneg)
-		cap = mii_resolve_flowctrl_fdx(local_adv, remote_adv);
-	else
-		cap = phy->fc_request_control;
-
-	lan743x_mac_flow_ctrl_set_enables(adapter,
-					  cap & FLOW_CTRL_TX,
-					  cap & FLOW_CTRL_RX);
-}
-
 static int lan743x_phy_init(struct lan743x_adapter *adapter)
 {
 	return lan743x_phy_reset(adapter);
 }
 
-static void lan743x_phy_link_status_change(struct net_device *netdev)
-{
-	struct lan743x_adapter *adapter = netdev_priv(netdev);
-	struct phy_device *phydev = netdev->phydev;
-	u32 data;
-
-	phy_print_status(phydev);
-	if (phydev->state == PHY_RUNNING) {
-		int remote_advertisement = 0;
-		int local_advertisement = 0;
-
-		data = lan743x_csr_read(adapter, MAC_CR);
-
-		/* set duplex mode */
-		if (phydev->duplex)
-			data |= MAC_CR_DPX_;
-		else
-			data &= ~MAC_CR_DPX_;
-
-		/* set bus speed */
-		switch (phydev->speed) {
-		case SPEED_10:
-			data &= ~MAC_CR_CFG_H_;
-			data &= ~MAC_CR_CFG_L_;
-		break;
-		case SPEED_100:
-			data &= ~MAC_CR_CFG_H_;
-			data |= MAC_CR_CFG_L_;
-		break;
-		case SPEED_1000:
-			data |= MAC_CR_CFG_H_;
-			data &= ~MAC_CR_CFG_L_;
-		break;
-		case SPEED_2500:
-			data |= MAC_CR_CFG_H_;
-			data |= MAC_CR_CFG_L_;
-		break;
-		}
-		lan743x_csr_write(adapter, MAC_CR, data);
-
-		local_advertisement =
-			linkmode_adv_to_mii_adv_t(phydev->advertising);
-		remote_advertisement =
-			linkmode_adv_to_mii_adv_t(phydev->lp_advertising);
-
-		lan743x_phy_update_flowcontrol(adapter, local_advertisement,
-					       remote_advertisement);
-		lan743x_ptp_update_latency(adapter, phydev->speed);
-		if (phydev->interface == PHY_INTERFACE_MODE_SGMII ||
-		    phydev->interface == PHY_INTERFACE_MODE_1000BASEX ||
-		    phydev->interface == PHY_INTERFACE_MODE_2500BASEX)
-			lan743x_sgmii_config(adapter);
-
-		data = lan743x_csr_read(adapter, MAC_CR);
-		if (phydev->enable_tx_lpi)
-			data |=  MAC_CR_EEE_EN_;
-		else
-			data &= ~MAC_CR_EEE_EN_;
-		lan743x_csr_write(adapter, MAC_CR, data);
-	}
-}
-
-static void lan743x_phy_close(struct lan743x_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct phy_device *phydev = netdev->phydev;
-
-	phy_stop(netdev->phydev);
-	phy_disconnect(netdev->phydev);
-
-	/* using phydev here as phy_disconnect NULLs netdev->phydev */
-	if (phy_is_pseudo_fixed_link(phydev))
-		fixed_phy_unregister(phydev);
-
-}
-
 static void lan743x_phy_interface_select(struct lan743x_adapter *adapter)
 {
 	u32 id_rev;
@@ -1502,65 +1367,9 @@ static void lan743x_phy_interface_select(struct lan743x_adapter *adapter)
 		adapter->phy_interface = PHY_INTERFACE_MODE_MII;
 	else
 		adapter->phy_interface = PHY_INTERFACE_MODE_RGMII;
-}
-
-static int lan743x_phy_open(struct lan743x_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct lan743x_phy *phy = &adapter->phy;
-	struct fixed_phy_status fphy_status = {
-		.link = 1,
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_FULL,
-	};
-	struct phy_device *phydev;
-	int ret = -EIO;
-
-	/* try devicetree phy, or fixed link */
-	phydev = of_phy_get_and_connect(netdev, adapter->pdev->dev.of_node,
-					lan743x_phy_link_status_change);
-
-	if (!phydev) {
-		/* try internal phy */
-		phydev = phy_find_first(adapter->mdiobus);
-		if (!phydev)	{
-			if ((adapter->csr.id_rev & ID_REV_ID_MASK_) ==
-					ID_REV_ID_LAN7431_) {
-				phydev = fixed_phy_register(PHY_POLL,
-							    &fphy_status, NULL);
-				if (IS_ERR(phydev)) {
-					netdev_err(netdev, "No PHY/fixed_PHY found\n");
-					return PTR_ERR(phydev);
-				}
-			} else {
-				goto return_error;
-				}
-		}
-
-		lan743x_phy_interface_select(adapter);
-
-		ret = phy_connect_direct(netdev, phydev,
-					 lan743x_phy_link_status_change,
-					 adapter->phy_interface);
-		if (ret)
-			goto return_error;
-	}
-
-	/* MAC doesn't support 1000T Half */
-	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
-
-	/* support both flow controls */
-	phy_support_asym_pause(phydev);
-	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
-	phy->fc_autoneg = phydev->autoneg;
-
-	phy_start(phydev);
-	phy_start_aneg(phydev);
-	phy_attached_info(phydev);
-	return 0;
 
-return_error:
-	return ret;
+	netif_dbg(adapter, drv, adapter->netdev,
+		  "selected phy interface: 0x%X\n", adapter->phy_interface);
 }
 
 static void lan743x_rfe_open(struct lan743x_adapter *adapter)
@@ -3061,6 +2870,336 @@ return_error:
 	return ret;
 }
 
+static int lan743x_phylink_sgmii_config(struct lan743x_adapter *adapter)
+{
+	u32 sgmii_ctl;
+	int ret;
+
+	ret = lan743x_get_lsd(SPEED_1000, DUPLEX_FULL,
+			      MASTER_SLAVE_STATE_MASTER);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d link-speed-duplex(LSD) invalid\n", ret);
+		return ret;
+	}
+
+	adapter->sgmii_lsd = ret;
+	netif_dbg(adapter, drv, adapter->netdev,
+		  "Link Speed Duplex (lsd) : 0x%X\n", adapter->sgmii_lsd);
+
+	/* LINK_STATUS_SOURCE from the External PHY via SGMII */
+	sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+	sgmii_ctl &= ~SGMII_CTL_LINK_STATUS_SOURCE_;
+	lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+
+	ret = lan743x_serdes_clock_and_aneg_update(adapter);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d sgmii aneg update failed\n", ret);
+		return ret;
+	}
+
+	return lan743x_pcs_power_reset(adapter);
+}
+
+static int lan743x_phylink_1000basex_config(struct lan743x_adapter *adapter)
+{
+	u32 sgmii_ctl;
+	int ret;
+
+	ret = lan743x_get_lsd(SPEED_1000, DUPLEX_FULL,
+			      MASTER_SLAVE_STATE_MASTER);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d link-speed-duplex(LSD) invalid\n", ret);
+		return ret;
+	}
+
+	adapter->sgmii_lsd = ret;
+	netif_dbg(adapter, drv, adapter->netdev,
+		  "Link Speed Duplex (lsd) : 0x%X\n", adapter->sgmii_lsd);
+
+	/* LINK_STATUS_SOURCE from 1000BASE-X PCS link status */
+	sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+	sgmii_ctl |= SGMII_CTL_LINK_STATUS_SOURCE_;
+	lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+
+	ret = lan743x_serdes_clock_and_aneg_update(adapter);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d 1000basex aneg update failed\n", ret);
+		return ret;
+	}
+
+	return lan743x_pcs_power_reset(adapter);
+}
+
+static int lan743x_phylink_2500basex_config(struct lan743x_adapter *adapter)
+{
+	u32 sgmii_ctl;
+	int ret;
+
+	ret = lan743x_get_lsd(SPEED_2500, DUPLEX_FULL,
+			      MASTER_SLAVE_STATE_MASTER);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d link-speed-duplex(LSD) invalid\n", ret);
+		return ret;
+	}
+
+	adapter->sgmii_lsd = ret;
+	netif_dbg(adapter, drv, adapter->netdev,
+		  "Link Speed Duplex (lsd) : 0x%X\n", adapter->sgmii_lsd);
+
+	/* LINK_STATUS_SOURCE from 2500BASE-X PCS link status */
+	sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+	sgmii_ctl |= SGMII_CTL_LINK_STATUS_SOURCE_;
+	lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+
+	ret = lan743x_serdes_clock_and_aneg_update(adapter);
+	if (ret < 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "error %d 2500basex aneg update failed\n", ret);
+		return ret;
+	}
+
+	return lan743x_pcs_power_reset(adapter);
+}
+
+void lan743x_mac_eee_enable(struct lan743x_adapter *adapter, bool enable)
+{
+	u32 mac_cr;
+
+	mac_cr = lan743x_csr_read(adapter, MAC_CR);
+	if (enable)
+		mac_cr |= MAC_CR_EEE_EN_;
+	else
+		mac_cr &= ~MAC_CR_EEE_EN_;
+	lan743x_csr_write(adapter, MAC_CR, mac_cr);
+}
+
+static void lan743x_phylink_mac_config(struct phylink_config *config,
+				       unsigned int link_an_mode,
+				       const struct phylink_link_state *state)
+{
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_2500BASEX:
+		ret = lan743x_phylink_2500basex_config(adapter);
+		if (ret < 0)
+			netif_err(adapter, drv, adapter->netdev,
+				  "2500BASEX config failed. Error %d\n", ret);
+		else
+			netif_dbg(adapter, drv, adapter->netdev,
+				  "2500BASEX mode selected and configured\n");
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		ret = lan743x_phylink_1000basex_config(adapter);
+		if (ret < 0)
+			netif_err(adapter, drv, adapter->netdev,
+				  "1000BASEX config failed. Error %d\n", ret);
+		else
+			netif_dbg(adapter, drv, adapter->netdev,
+				  "1000BASEX mode selected and configured\n");
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		ret = lan743x_phylink_sgmii_config(adapter);
+		if (ret < 0)
+			netif_err(adapter, drv, adapter->netdev,
+				  "SGMII config failed. Error %d\n", ret);
+		else
+			netif_dbg(adapter, drv, adapter->netdev,
+				  "SGMII mode selected and configured\n");
+		break;
+	default:
+		netif_dbg(adapter, drv, adapter->netdev,
+			  "RGMII/GMII/MII(0x%X) mode enable\n",
+			  state->interface);
+		break;
+	}
+}
+
+static void lan743x_phylink_mac_link_down(struct phylink_config *config,
+					  unsigned int link_an_mode,
+					  phy_interface_t interface)
+{
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	netif_tx_stop_all_queues(to_net_dev(config->dev));
+	lan743x_mac_eee_enable(adapter, false);
+}
+
+static void lan743x_phylink_mac_link_up(struct phylink_config *config,
+					struct phy_device *phydev,
+					unsigned int link_an_mode,
+					phy_interface_t interface,
+					int speed, int duplex,
+					bool tx_pause, bool rx_pause)
+{
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int mac_cr;
+	u8 cap;
+
+	mac_cr = lan743x_csr_read(adapter, MAC_CR);
+	/* Pre-initialize register bits.
+	 * Resulting value corresponds to SPEED_10
+	 */
+	mac_cr &= ~(MAC_CR_CFG_H_ | MAC_CR_CFG_L_);
+	if (speed == SPEED_2500)
+		mac_cr |= MAC_CR_CFG_H_ | MAC_CR_CFG_L_;
+	else if (speed == SPEED_1000)
+		mac_cr |= MAC_CR_CFG_H_;
+	else if (speed == SPEED_100)
+		mac_cr |= MAC_CR_CFG_L_;
+
+	lan743x_csr_write(adapter, MAC_CR, mac_cr);
+
+	lan743x_ptp_update_latency(adapter, speed);
+
+	/* Flow Control operation */
+	cap = 0;
+	if (tx_pause)
+		cap |= FLOW_CTRL_TX;
+	if (rx_pause)
+		cap |= FLOW_CTRL_RX;
+
+	lan743x_mac_flow_ctrl_set_enables(adapter,
+					  cap & FLOW_CTRL_TX,
+					  cap & FLOW_CTRL_RX);
+
+	if (phydev)
+		lan743x_mac_eee_enable(adapter, phydev->enable_tx_lpi);
+
+	netif_tx_wake_all_queues(netdev);
+}
+
+static const struct phylink_mac_ops lan743x_phylink_mac_ops = {
+	.mac_config = lan743x_phylink_mac_config,
+	.mac_link_down = lan743x_phylink_mac_link_down,
+	.mac_link_up = lan743x_phylink_mac_link_up,
+};
+
+static int lan743x_phylink_create(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct phylink *pl;
+
+	adapter->phylink_config.dev = &netdev->dev;
+	adapter->phylink_config.type = PHYLINK_NETDEV;
+	adapter->phylink_config.mac_managed_pm = false;
+
+	adapter->phylink_config.mac_capabilities = MAC_ASYM_PAUSE |
+		MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
+
+	lan743x_phy_interface_select(adapter);
+
+	switch (adapter->phy_interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		__set_bit(PHY_INTERFACE_MODE_SGMII,
+			  adapter->phylink_config.supported_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_1000BASEX,
+			  adapter->phylink_config.supported_interfaces);
+		__set_bit(PHY_INTERFACE_MODE_2500BASEX,
+			  adapter->phylink_config.supported_interfaces);
+		adapter->phylink_config.mac_capabilities |= MAC_2500FD;
+		break;
+	case PHY_INTERFACE_MODE_GMII:
+		__set_bit(PHY_INTERFACE_MODE_GMII,
+			  adapter->phylink_config.supported_interfaces);
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		__set_bit(PHY_INTERFACE_MODE_MII,
+			  adapter->phylink_config.supported_interfaces);
+		break;
+	default:
+		phy_interface_set_rgmii(adapter->phylink_config.supported_interfaces);
+	}
+
+	pl = phylink_create(&adapter->phylink_config, NULL,
+			    adapter->phy_interface, &lan743x_phylink_mac_ops);
+
+	if (IS_ERR(pl)) {
+		netdev_err(netdev, "Could not create phylink (%pe)\n", pl);
+		return PTR_ERR(pl);
+	}
+
+	adapter->phylink = pl;
+	netdev_dbg(netdev, "lan743x phylink created");
+
+	return 0;
+}
+
+static bool lan743x_phy_handle_exists(struct device_node *dn)
+{
+	dn = of_parse_phandle(dn, "phy-handle", 0);
+	of_node_put(dn);
+	return dn != NULL;
+}
+
+static int lan743x_phylink_connect(struct lan743x_adapter *adapter)
+{
+	struct device_node *dn = adapter->pdev->dev.of_node;
+	struct net_device *dev = adapter->netdev;
+	struct phy_device *phydev;
+	int ret;
+
+	if (dn)
+		ret = phylink_of_phy_connect(adapter->phylink, dn, 0);
+
+	if (!dn || (ret && !lan743x_phy_handle_exists(dn))) {
+		phydev = phy_find_first(adapter->mdiobus);
+		if (phydev) {
+			/* attach the mac to the phy */
+			ret = phylink_connect_phy(adapter->phylink, phydev);
+		} else if (((adapter->csr.id_rev & ID_REV_ID_MASK_) ==
+			      ID_REV_ID_LAN7431_) || adapter->is_pci11x1x) {
+			struct phylink_link_state state;
+			unsigned long caps;
+
+			caps = adapter->phylink_config.mac_capabilities;
+			if (caps & MAC_2500FD) {
+				state.speed = SPEED_2500;
+				state.duplex = DUPLEX_FULL;
+			} else if (caps & MAC_1000FD) {
+				state.speed = SPEED_1000;
+				state.duplex = DUPLEX_FULL;
+			} else {
+				state.speed = SPEED_UNKNOWN;
+				state.duplex = DUPLEX_UNKNOWN;
+			}
+
+			ret = phylink_set_fixed_link(adapter->phylink, &state);
+			if (ret) {
+				netdev_err(dev, "Could not set fixed link\n");
+				return ret;
+			}
+		} else {
+			netdev_err(dev, "no PHY found\n");
+			return -ENXIO;
+		}
+	}
+
+	if (ret) {
+		netdev_err(dev, "Could not attach PHY (%d)\n", ret);
+		return ret;
+	}
+
+	phylink_start(adapter->phylink);
+
+	return 0;
+}
+
+static void lan743x_phylink_disconnect(struct lan743x_adapter *adapter)
+{
+	phylink_stop(adapter->phylink);
+	phylink_disconnect_phy(adapter->phylink);
+}
+
 static int lan743x_netdev_close(struct net_device *netdev)
 {
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
@@ -3074,7 +3213,7 @@ static int lan743x_netdev_close(struct net_device *netdev)
 
 	lan743x_ptp_close(adapter);
 
-	lan743x_phy_close(adapter);
+	lan743x_phylink_disconnect(adapter);
 
 	lan743x_mac_close(adapter);
 
@@ -3097,13 +3236,13 @@ static int lan743x_netdev_open(struct net_device *netdev)
 	if (ret)
 		goto close_intr;
 
-	ret = lan743x_phy_open(adapter);
+	ret = lan743x_phylink_connect(adapter);
 	if (ret)
 		goto close_mac;
 
 	ret = lan743x_ptp_open(adapter);
 	if (ret)
-		goto close_phy;
+		goto close_mac;
 
 	lan743x_rfe_open(adapter);
 
@@ -3119,6 +3258,9 @@ static int lan743x_netdev_open(struct net_device *netdev)
 			goto close_tx;
 	}
 
+	if (netdev->phydev)
+		phy_support_eee(netdev->phydev);
+
 #ifdef CONFIG_PM
 	if (adapter->netdev->phydev) {
 		struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
@@ -3143,9 +3285,8 @@ close_rx:
 			lan743x_rx_close(&adapter->rx[index]);
 	}
 	lan743x_ptp_close(adapter);
-
-close_phy:
-	lan743x_phy_close(adapter);
+	if (adapter->phylink)
+		lan743x_phylink_disconnect(adapter);
 
 close_mac:
 	lan743x_mac_close(adapter);
@@ -3174,11 +3315,14 @@ static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb,
 static int lan743x_netdev_ioctl(struct net_device *netdev,
 				struct ifreq *ifr, int cmd)
 {
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
 	if (!netif_running(netdev))
 		return -EINVAL;
 	if (cmd == SIOCSHWTSTAMP)
 		return lan743x_ptp_ioctl(netdev, ifr, cmd);
-	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+
+	return phylink_mii_ioctl(adapter->phylink, ifr, cmd);
 }
 
 static void lan743x_netdev_set_multicast(struct net_device *netdev)
@@ -3283,10 +3427,17 @@ static void lan743x_mdiobus_cleanup(struct lan743x_adapter *adapter)
 	mdiobus_unregister(adapter->mdiobus);
 }
 
+static void lan743x_destroy_phylink(struct lan743x_adapter *adapter)
+{
+	phylink_destroy(adapter->phylink);
+	adapter->phylink = NULL;
+}
+
 static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
 {
 	unregister_netdev(adapter->netdev);
 
+	lan743x_destroy_phylink(adapter);
 	lan743x_mdiobus_cleanup(adapter);
 	lan743x_hardware_cleanup(adapter);
 	lan743x_pci_cleanup(adapter);
@@ -3500,14 +3651,21 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev,
 				    NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	adapter->netdev->hw_features = adapter->netdev->features;
 
-	/* carrier off reporting is important to ethtool even BEFORE open */
-	netif_carrier_off(netdev);
+	ret = lan743x_phylink_create(adapter);
+	if (ret < 0) {
+		netif_err(adapter, probe, netdev,
+			  "failed to setup phylink (%d)\n", ret);
+		goto cleanup_mdiobus;
+	}
 
 	ret = register_netdev(adapter->netdev);
 	if (ret < 0)
-		goto cleanup_mdiobus;
+		goto cleanup_phylink;
 	return 0;
 
+cleanup_phylink:
+	lan743x_destroy_phylink(adapter);
+
 cleanup_mdiobus:
 	lan743x_mdiobus_cleanup(adapter);
 
@@ -3763,6 +3921,7 @@ static int lan743x_pm_resume(struct device *dev)
 	       MAC_WK_SRC_WK_FR_SAVED_;
 	lan743x_csr_write(adapter, MAC_WK_SRC, data);
 
+	rtnl_lock();
 	/* open netdev when netdev is at running state while resume.
 	 * For instance, it is true when system wakesup after pm-suspend
 	 * However, it is false when system wakes up after suspend GUI menu
@@ -3771,6 +3930,7 @@ static int lan743x_pm_resume(struct device *dev)
 		lan743x_netdev_open(netdev);
 
 	netif_device_attach(netdev);
+	rtnl_unlock();
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 3b2585a384e2..8ef897c114d3 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -5,6 +5,7 @@
 #define _LAN743X_H
 
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include "lan743x_ptp.h"
 
 #define DRIVER_AUTHOR   "Bryan Whitehead <Bryan.Whitehead@microchip.com>"
@@ -1083,6 +1084,8 @@ struct lan743x_adapter {
 	u32			flags;
 	u32			hw_cfg;
 	phy_interface_t		phy_interface;
+	struct phylink		*phylink;
+	struct phylink_config	phylink_config;
 };
 
 #define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))
@@ -1203,5 +1206,6 @@ void lan743x_hs_syslock_release(struct lan743x_adapter *adapter);
 void lan743x_mac_flow_ctrl_set_enables(struct lan743x_adapter *adapter,
 				       bool tx_enable, bool rx_enable);
 int lan743x_sgmii_read(struct lan743x_adapter *adapter, u8 mmd, u16 addr);
+void lan743x_mac_eee_enable(struct lan743x_adapter *adapter, bool enable);
 
 #endif /* _LAN743X_H */
diff --git a/drivers/net/ethernet/microchip/lan865x/Kconfig b/drivers/net/ethernet/microchip/lan865x/Kconfig
new file mode 100644
index 000000000000..7f2a4e7e1915
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan865x/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Microchip LAN865x Driver Support
+#
+
+if NET_VENDOR_MICROCHIP
+
+config LAN865X
+	tristate "LAN865x support"
+	depends on SPI
+	select OA_TC6
+	help
+	  Support for the Microchip LAN8650/1 Rev.B0/B1 MACPHY Ethernet chip. It
+	  uses OPEN Alliance 10BASE-T1x Serial Interface specification.
+
+	  To compile this driver as a module, choose M here. The module will be
+	  called lan865x.
+
+endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/lan865x/Makefile b/drivers/net/ethernet/microchip/lan865x/Makefile
new file mode 100644
index 000000000000..9f5dd89c1eb8
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan865x/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Microchip LAN865x Driver
+#
+
+obj-$(CONFIG_LAN865X) += lan865x.o
diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c
new file mode 100644
index 000000000000..dd436bdff0f8
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Microchip's LAN865x 10BASE-T1S MAC-PHY driver
+ *
+ * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/phy.h>
+#include <linux/oa_tc6.h>
+
+#define DRV_NAME			"lan8650"
+
+/* MAC Network Control Register */
+#define LAN865X_REG_MAC_NET_CTL		0x00010000
+#define MAC_NET_CTL_TXEN		BIT(3) /* Transmit Enable */
+#define MAC_NET_CTL_RXEN		BIT(2) /* Receive Enable */
+
+/* MAC Network Configuration Reg */
+#define LAN865X_REG_MAC_NET_CFG		0x00010001
+#define MAC_NET_CFG_PROMISCUOUS_MODE	BIT(4)
+#define MAC_NET_CFG_MULTICAST_MODE	BIT(6)
+#define MAC_NET_CFG_UNICAST_MODE	BIT(7)
+
+/* MAC Hash Register Bottom */
+#define LAN865X_REG_MAC_L_HASH		0x00010020
+/* MAC Hash Register Top */
+#define LAN865X_REG_MAC_H_HASH		0x00010021
+/* MAC Specific Addr 1 Bottom Reg */
+#define LAN865X_REG_MAC_L_SADDR1	0x00010022
+/* MAC Specific Addr 1 Top Reg */
+#define LAN865X_REG_MAC_H_SADDR1	0x00010023
+
+struct lan865x_priv {
+	struct work_struct multicast_work;
+	struct net_device *netdev;
+	struct spi_device *spi;
+	struct oa_tc6 *tc6;
+};
+
+static int lan865x_set_hw_macaddr_low_bytes(struct oa_tc6 *tc6, const u8 *mac)
+{
+	u32 regval;
+
+	regval = (mac[3] << 24) | (mac[2] << 16) | (mac[1] << 8) | mac[0];
+
+	return oa_tc6_write_register(tc6, LAN865X_REG_MAC_L_SADDR1, regval);
+}
+
+static int lan865x_set_hw_macaddr(struct lan865x_priv *priv, const u8 *mac)
+{
+	int restore_ret;
+	u32 regval;
+	int ret;
+
+	/* Configure MAC address low bytes */
+	ret = lan865x_set_hw_macaddr_low_bytes(priv->tc6, mac);
+	if (ret)
+		return ret;
+
+	/* Prepare and configure MAC address high bytes */
+	regval = (mac[5] << 8) | mac[4];
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_H_SADDR1,
+				    regval);
+	if (!ret)
+		return 0;
+
+	/* Restore the old MAC address low bytes from netdev if the new MAC
+	 * address high bytes setting failed.
+	 */
+	restore_ret = lan865x_set_hw_macaddr_low_bytes(priv->tc6,
+						       priv->netdev->dev_addr);
+	if (restore_ret)
+		return restore_ret;
+
+	return ret;
+}
+
+static const struct ethtool_ops lan865x_ethtool_ops = {
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+};
+
+static int lan865x_set_mac_address(struct net_device *netdev, void *addr)
+{
+	struct lan865x_priv *priv = netdev_priv(netdev);
+	struct sockaddr *address = addr;
+	int ret;
+
+	ret = eth_prepare_mac_addr_change(netdev, addr);
+	if (ret < 0)
+		return ret;
+
+	if (ether_addr_equal(address->sa_data, netdev->dev_addr))
+		return 0;
+
+	ret = lan865x_set_hw_macaddr(priv, address->sa_data);
+	if (ret)
+		return ret;
+
+	eth_commit_mac_addr_change(netdev, addr);
+
+	return 0;
+}
+
+static u32 get_address_bit(u8 addr[ETH_ALEN], u32 bit)
+{
+	return ((addr[bit / 8]) >> (bit % 8)) & 1;
+}
+
+static u32 lan865x_hash(u8 addr[ETH_ALEN])
+{
+	u32 hash_index = 0;
+
+	for (int i = 0; i < 6; i++) {
+		u32 hash = 0;
+
+		for (int j = 0; j < 8; j++)
+			hash ^= get_address_bit(addr, (j * 6) + i);
+
+		hash_index |= (hash << i);
+	}
+
+	return hash_index;
+}
+
+static int lan865x_set_specific_multicast_addr(struct lan865x_priv *priv)
+{
+	struct netdev_hw_addr *ha;
+	u32 hash_lo = 0;
+	u32 hash_hi = 0;
+	int ret;
+
+	netdev_for_each_mc_addr(ha, priv->netdev) {
+		u32 bit_num = lan865x_hash(ha->addr);
+
+		if (bit_num >= BIT(5))
+			hash_hi |= (1 << (bit_num - BIT(5)));
+		else
+			hash_lo |= (1 << bit_num);
+	}
+
+	/* Enabling specific multicast addresses */
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_H_HASH, hash_hi);
+	if (ret) {
+		netdev_err(priv->netdev, "Failed to write reg_hashh: %d\n",
+			   ret);
+		return ret;
+	}
+
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_L_HASH, hash_lo);
+	if (ret)
+		netdev_err(priv->netdev, "Failed to write reg_hashl: %d\n",
+			   ret);
+
+	return ret;
+}
+
+static int lan865x_set_all_multicast_addr(struct lan865x_priv *priv)
+{
+	int ret;
+
+	/* Enabling all multicast addresses */
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_H_HASH,
+				    0xffffffff);
+	if (ret) {
+		netdev_err(priv->netdev, "Failed to write reg_hashh: %d\n",
+			   ret);
+		return ret;
+	}
+
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_L_HASH,
+				    0xffffffff);
+	if (ret)
+		netdev_err(priv->netdev, "Failed to write reg_hashl: %d\n",
+			   ret);
+
+	return ret;
+}
+
+static int lan865x_clear_all_multicast_addr(struct lan865x_priv *priv)
+{
+	int ret;
+
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_H_HASH, 0);
+	if (ret) {
+		netdev_err(priv->netdev, "Failed to write reg_hashh: %d\n",
+			   ret);
+		return ret;
+	}
+
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_L_HASH, 0);
+	if (ret)
+		netdev_err(priv->netdev, "Failed to write reg_hashl: %d\n",
+			   ret);
+
+	return ret;
+}
+
+static void lan865x_multicast_work_handler(struct work_struct *work)
+{
+	struct lan865x_priv *priv = container_of(work, struct lan865x_priv,
+						 multicast_work);
+	u32 regval = 0;
+	int ret;
+
+	if (priv->netdev->flags & IFF_PROMISC) {
+		/* Enabling promiscuous mode */
+		regval |= MAC_NET_CFG_PROMISCUOUS_MODE;
+		regval &= (~MAC_NET_CFG_MULTICAST_MODE);
+		regval &= (~MAC_NET_CFG_UNICAST_MODE);
+	} else if (priv->netdev->flags & IFF_ALLMULTI) {
+		/* Enabling all multicast mode */
+		if (lan865x_set_all_multicast_addr(priv))
+			return;
+
+		regval &= (~MAC_NET_CFG_PROMISCUOUS_MODE);
+		regval |= MAC_NET_CFG_MULTICAST_MODE;
+		regval &= (~MAC_NET_CFG_UNICAST_MODE);
+	} else if (!netdev_mc_empty(priv->netdev)) {
+		/* Enabling specific multicast mode */
+		if (lan865x_set_specific_multicast_addr(priv))
+			return;
+
+		regval &= (~MAC_NET_CFG_PROMISCUOUS_MODE);
+		regval |= MAC_NET_CFG_MULTICAST_MODE;
+		regval &= (~MAC_NET_CFG_UNICAST_MODE);
+	} else {
+		/* Enabling local mac address only */
+		if (lan865x_clear_all_multicast_addr(priv))
+			return;
+	}
+	ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_NET_CFG, regval);
+	if (ret)
+		netdev_err(priv->netdev, "Failed to enable promiscuous/multicast/normal mode: %d\n",
+			   ret);
+}
+
+static void lan865x_set_multicast_list(struct net_device *netdev)
+{
+	struct lan865x_priv *priv = netdev_priv(netdev);
+
+	schedule_work(&priv->multicast_work);
+}
+
+static netdev_tx_t lan865x_send_packet(struct sk_buff *skb,
+				       struct net_device *netdev)
+{
+	struct lan865x_priv *priv = netdev_priv(netdev);
+
+	return oa_tc6_start_xmit(priv->tc6, skb);
+}
+
+static int lan865x_hw_disable(struct lan865x_priv *priv)
+{
+	u32 regval;
+
+	if (oa_tc6_read_register(priv->tc6, LAN865X_REG_MAC_NET_CTL, &regval))
+		return -ENODEV;
+
+	regval &= ~(MAC_NET_CTL_TXEN | MAC_NET_CTL_RXEN);
+
+	if (oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_NET_CTL, regval))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int lan865x_net_close(struct net_device *netdev)
+{
+	struct lan865x_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	netif_stop_queue(netdev);
+	phy_stop(netdev->phydev);
+	ret = lan865x_hw_disable(priv);
+	if (ret) {
+		netdev_err(netdev, "Failed to disable the hardware: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int lan865x_hw_enable(struct lan865x_priv *priv)
+{
+	u32 regval;
+
+	if (oa_tc6_read_register(priv->tc6, LAN865X_REG_MAC_NET_CTL, &regval))
+		return -ENODEV;
+
+	regval |= MAC_NET_CTL_TXEN | MAC_NET_CTL_RXEN;
+
+	if (oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_NET_CTL, regval))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int lan865x_net_open(struct net_device *netdev)
+{
+	struct lan865x_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	ret = lan865x_hw_enable(priv);
+	if (ret) {
+		netdev_err(netdev, "Failed to enable hardware: %d\n", ret);
+		return ret;
+	}
+
+	phy_start(netdev->phydev);
+
+	return 0;
+}
+
+static const struct net_device_ops lan865x_netdev_ops = {
+	.ndo_open		= lan865x_net_open,
+	.ndo_stop		= lan865x_net_close,
+	.ndo_start_xmit		= lan865x_send_packet,
+	.ndo_set_rx_mode	= lan865x_set_multicast_list,
+	.ndo_set_mac_address	= lan865x_set_mac_address,
+};
+
+static int lan865x_probe(struct spi_device *spi)
+{
+	struct net_device *netdev;
+	struct lan865x_priv *priv;
+	int ret;
+
+	netdev = alloc_etherdev(sizeof(struct lan865x_priv));
+	if (!netdev)
+		return -ENOMEM;
+
+	priv = netdev_priv(netdev);
+	priv->netdev = netdev;
+	priv->spi = spi;
+	spi_set_drvdata(spi, priv);
+	INIT_WORK(&priv->multicast_work, lan865x_multicast_work_handler);
+
+	priv->tc6 = oa_tc6_init(spi, netdev);
+	if (!priv->tc6) {
+		ret = -ENODEV;
+		goto free_netdev;
+	}
+
+	/* As per the point s3 in the below errata, SPI receive Ethernet frame
+	 * transfer may halt when starting the next frame in the same data block
+	 * (chunk) as the end of a previous frame. The RFA field should be
+	 * configured to 01b or 10b for proper operation. In these modes, only
+	 * one receive Ethernet frame will be placed in a single data block.
+	 * When the RFA field is written to 01b, received frames will be forced
+	 * to only start in the first word of the data block payload (SWO=0). As
+	 * recommended, enable zero align receive frame feature for proper
+	 * operation.
+	 *
+	 * https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/Errata/LAN8650-1-Errata-80001075.pdf
+	 */
+	ret = oa_tc6_zero_align_receive_frame_enable(priv->tc6);
+	if (ret) {
+		dev_err(&spi->dev, "Failed to set ZARFE: %d\n", ret);
+		goto oa_tc6_exit;
+	}
+
+	/* Get the MAC address from the SPI device tree node */
+	if (device_get_ethdev_address(&spi->dev, netdev))
+		eth_hw_addr_random(netdev);
+
+	ret = lan865x_set_hw_macaddr(priv, netdev->dev_addr);
+	if (ret) {
+		dev_err(&spi->dev, "Failed to configure MAC: %d\n", ret);
+		goto oa_tc6_exit;
+	}
+
+	netdev->if_port = IF_PORT_10BASET;
+	netdev->irq = spi->irq;
+	netdev->netdev_ops = &lan865x_netdev_ops;
+	netdev->ethtool_ops = &lan865x_ethtool_ops;
+
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(&spi->dev, "Register netdev failed (ret = %d)", ret);
+		goto oa_tc6_exit;
+	}
+
+	return 0;
+
+oa_tc6_exit:
+	oa_tc6_exit(priv->tc6);
+free_netdev:
+	free_netdev(priv->netdev);
+	return ret;
+}
+
+static void lan865x_remove(struct spi_device *spi)
+{
+	struct lan865x_priv *priv = spi_get_drvdata(spi);
+
+	cancel_work_sync(&priv->multicast_work);
+	unregister_netdev(priv->netdev);
+	oa_tc6_exit(priv->tc6);
+	free_netdev(priv->netdev);
+}
+
+static const struct spi_device_id spidev_spi_ids[] = {
+	{ .name = "lan8650" },
+	{},
+};
+
+static const struct of_device_id lan865x_dt_ids[] = {
+	{ .compatible = "microchip,lan8650" },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, lan865x_dt_ids);
+
+static struct spi_driver lan865x_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = lan865x_dt_ids,
+	 },
+	.probe = lan865x_probe,
+	.remove = lan865x_remove,
+	.id_table = spidev_spi_ids,
+};
+module_spi_driver(lan865x_driver);
+
+MODULE_DESCRIPTION(DRV_NAME " 10Base-T1S MACPHY Ethernet Driver");
+MODULE_AUTHOR("Parthiban Veerasooran <parthiban.veerasooran@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig
index f9ebffc04eb8..f663b6e12466 100644
--- a/drivers/net/ethernet/microchip/lan966x/Kconfig
+++ b/drivers/net/ethernet/microchip/lan966x/Kconfig
@@ -8,6 +8,7 @@ config LAN966X_SWITCH
 	select PHYLINK
 	select PAGE_POOL
 	select VCAP
+	select FDMA
 	help
 	  This driver supports the Lan966x network switch device.
 
diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index 3b6ac331691d..4cdbe263502c 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -20,3 +20,4 @@ lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o
 
 # Provide include files
 ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap
+ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/fdma
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
index aec7066d83b3..2474dfd330f4 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
@@ -549,16 +549,13 @@ static int lan966x_get_ts_info(struct net_device *dev,
 
 	phc = &lan966x->phc[LAN966X_PHC_PORT];
 
-	info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1;
-	if (info->phc_index == -1) {
-		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-					 SOF_TIMESTAMPING_RX_SOFTWARE |
-					 SOF_TIMESTAMPING_SOFTWARE;
+	if (phc->clock) {
+		info->phc_index = ptp_clock_index(phc->clock);
+	} else {
+		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	}
 	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-				 SOF_TIMESTAMPING_RX_SOFTWARE |
-				 SOF_TIMESTAMPING_SOFTWARE |
 				 SOF_TIMESTAMPING_TX_HARDWARE |
 				 SOF_TIMESTAMPING_RX_HARDWARE |
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 3960534ac2ad..502670718104 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -6,31 +6,55 @@
 
 #include "lan966x_main.h"
 
-static int lan966x_fdma_channel_active(struct lan966x *lan966x)
-{
-	return lan_rd(lan966x, FDMA_CH_ACTIVE);
-}
-
-static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx,
-					       struct lan966x_db *db)
+static int lan966x_fdma_rx_dataptr_cb(struct fdma *fdma, int dcb, int db,
+				      u64 *dataptr)
 {
+	struct lan966x *lan966x = (struct lan966x *)fdma->priv;
+	struct lan966x_rx *rx = &lan966x->rx;
 	struct page *page;
 
 	page = page_pool_dev_alloc_pages(rx->page_pool);
 	if (unlikely(!page))
-		return NULL;
+		return -ENOMEM;
+
+	rx->page[dcb][db] = page;
+	*dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM;
+
+	return 0;
+}
 
-	db->dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM;
+static int lan966x_fdma_tx_dataptr_cb(struct fdma *fdma, int dcb, int db,
+				      u64 *dataptr)
+{
+	struct lan966x *lan966x = (struct lan966x *)fdma->priv;
+
+	*dataptr = lan966x->tx.dcbs_buf[dcb].dma_addr;
 
-	return page;
+	return 0;
+}
+
+static int lan966x_fdma_xdp_tx_dataptr_cb(struct fdma *fdma, int dcb, int db,
+					  u64 *dataptr)
+{
+	struct lan966x *lan966x = (struct lan966x *)fdma->priv;
+
+	*dataptr = lan966x->tx.dcbs_buf[dcb].dma_addr + XDP_PACKET_HEADROOM;
+
+	return 0;
+}
+
+static int lan966x_fdma_channel_active(struct lan966x *lan966x)
+{
+	return lan_rd(lan966x, FDMA_CH_ACTIVE);
 }
 
 static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx)
 {
+	struct fdma *fdma = &rx->fdma;
 	int i, j;
 
-	for (i = 0; i < FDMA_DCB_MAX; ++i) {
-		for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j)
+	for (i = 0; i < fdma->n_dcbs; ++i) {
+		for (j = 0; j < fdma->n_dbs; ++j)
 			page_pool_put_full_page(rx->page_pool,
 						rx->page[i][j], false);
 	}
@@ -38,41 +62,23 @@ static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx)
 
 static void lan966x_fdma_rx_free_page(struct lan966x_rx *rx)
 {
+	struct fdma *fdma = &rx->fdma;
 	struct page *page;
 
-	page = rx->page[rx->dcb_index][rx->db_index];
+	page = rx->page[fdma->dcb_index][fdma->db_index];
 	if (unlikely(!page))
 		return;
 
 	page_pool_recycle_direct(rx->page_pool, page);
 }
 
-static void lan966x_fdma_rx_add_dcb(struct lan966x_rx *rx,
-				    struct lan966x_rx_dcb *dcb,
-				    u64 nextptr)
-{
-	struct lan966x_db *db;
-	int i;
-
-	for (i = 0; i < FDMA_RX_DCB_MAX_DBS; ++i) {
-		db = &dcb->db[i];
-		db->status = FDMA_DCB_STATUS_INTR;
-	}
-
-	dcb->nextptr = FDMA_DCB_INVALID_DATA;
-	dcb->info = FDMA_DCB_INFO_DATAL(PAGE_SIZE << rx->page_order);
-
-	rx->last_entry->nextptr = nextptr;
-	rx->last_entry = dcb;
-}
-
 static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx)
 {
 	struct lan966x *lan966x = rx->lan966x;
 	struct page_pool_params pp_params = {
 		.order = rx->page_order,
 		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
-		.pool_size = FDMA_DCB_MAX,
+		.pool_size = rx->fdma.n_dcbs,
 		.nid = NUMA_NO_NODE,
 		.dev = lan966x->dev,
 		.dma_dir = DMA_FROM_DEVICE,
@@ -104,84 +110,41 @@ static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx)
 static int lan966x_fdma_rx_alloc(struct lan966x_rx *rx)
 {
 	struct lan966x *lan966x = rx->lan966x;
-	struct lan966x_rx_dcb *dcb;
-	struct lan966x_db *db;
-	struct page *page;
-	int i, j;
-	int size;
+	struct fdma *fdma = &rx->fdma;
+	int err;
 
 	if (lan966x_fdma_rx_alloc_page_pool(rx))
 		return PTR_ERR(rx->page_pool);
 
-	/* calculate how many pages are needed to allocate the dcbs */
-	size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-
-	rx->dcbs = dma_alloc_coherent(lan966x->dev, size, &rx->dma, GFP_KERNEL);
-	if (!rx->dcbs)
-		return -ENOMEM;
-
-	rx->last_entry = rx->dcbs;
-	rx->db_index = 0;
-	rx->dcb_index = 0;
-
-	/* Now for each dcb allocate the dbs */
-	for (i = 0; i < FDMA_DCB_MAX; ++i) {
-		dcb = &rx->dcbs[i];
-		dcb->info = 0;
-
-		/* For each db allocate a page and map it to the DB dataptr. */
-		for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) {
-			db = &dcb->db[j];
-			page = lan966x_fdma_rx_alloc_page(rx, db);
-			if (!page)
-				return -ENOMEM;
-
-			db->status = 0;
-			rx->page[i][j] = page;
-		}
+	err = fdma_alloc_coherent(lan966x->dev, fdma);
+	if (err)
+		return err;
 
-		lan966x_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * i);
-	}
+	fdma_dcbs_init(fdma, FDMA_DCB_INFO_DATAL(fdma->db_size),
+		       FDMA_DCB_STATUS_INTR);
 
 	return 0;
 }
 
-static void lan966x_fdma_rx_advance_dcb(struct lan966x_rx *rx)
-{
-	rx->dcb_index++;
-	rx->dcb_index &= FDMA_DCB_MAX - 1;
-}
-
-static void lan966x_fdma_rx_free(struct lan966x_rx *rx)
-{
-	struct lan966x *lan966x = rx->lan966x;
-	u32 size;
-
-	/* Now it is possible to do the cleanup of dcb */
-	size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	dma_free_coherent(lan966x->dev, size, rx->dcbs, rx->dma);
-}
-
 static void lan966x_fdma_rx_start(struct lan966x_rx *rx)
 {
 	struct lan966x *lan966x = rx->lan966x;
+	struct fdma *fdma = &rx->fdma;
 	u32 mask;
 
 	/* When activating a channel, first is required to write the first DCB
 	 * address and then to activate it
 	 */
-	lan_wr(lower_32_bits((u64)rx->dma), lan966x,
-	       FDMA_DCB_LLP(rx->channel_id));
-	lan_wr(upper_32_bits((u64)rx->dma), lan966x,
-	       FDMA_DCB_LLP1(rx->channel_id));
+	lan_wr(lower_32_bits((u64)fdma->dma), lan966x,
+	       FDMA_DCB_LLP(fdma->channel_id));
+	lan_wr(upper_32_bits((u64)fdma->dma), lan966x,
+	       FDMA_DCB_LLP1(fdma->channel_id));
 
-	lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) |
+	lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) |
 	       FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
 	       FDMA_CH_CFG_CH_INJ_PORT_SET(0) |
 	       FDMA_CH_CFG_CH_MEM_SET(1),
-	       lan966x, FDMA_CH_CFG(rx->channel_id));
+	       lan966x, FDMA_CH_CFG(fdma->channel_id));
 
 	/* Start fdma */
 	lan_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0),
@@ -191,13 +154,13 @@ static void lan966x_fdma_rx_start(struct lan966x_rx *rx)
 	/* Enable interrupts */
 	mask = lan_rd(lan966x, FDMA_INTR_DB_ENA);
 	mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask);
-	mask |= BIT(rx->channel_id);
+	mask |= BIT(fdma->channel_id);
 	lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask),
 		FDMA_INTR_DB_ENA_INTR_DB_ENA,
 		lan966x, FDMA_INTR_DB_ENA);
 
 	/* Activate the channel */
-	lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(rx->channel_id)),
+	lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(fdma->channel_id)),
 		FDMA_CH_ACTIVATE_CH_ACTIVATE,
 		lan966x, FDMA_CH_ACTIVATE);
 }
@@ -205,18 +168,19 @@ static void lan966x_fdma_rx_start(struct lan966x_rx *rx)
 static void lan966x_fdma_rx_disable(struct lan966x_rx *rx)
 {
 	struct lan966x *lan966x = rx->lan966x;
+	struct fdma *fdma = &rx->fdma;
 	u32 val;
 
 	/* Disable the channel */
-	lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(rx->channel_id)),
+	lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(fdma->channel_id)),
 		FDMA_CH_DISABLE_CH_DISABLE,
 		lan966x, FDMA_CH_DISABLE);
 
 	readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x,
-				  val, !(val & BIT(rx->channel_id)),
+				  val, !(val & BIT(fdma->channel_id)),
 				  READL_SLEEP_US, READL_TIMEOUT_US);
 
-	lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(rx->channel_id)),
+	lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(fdma->channel_id)),
 		FDMA_CH_DB_DISCARD_DB_DISCARD,
 		lan966x, FDMA_CH_DB_DISCARD);
 }
@@ -225,50 +189,27 @@ static void lan966x_fdma_rx_reload(struct lan966x_rx *rx)
 {
 	struct lan966x *lan966x = rx->lan966x;
 
-	lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->channel_id)),
+	lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->fdma.channel_id)),
 		FDMA_CH_RELOAD_CH_RELOAD,
 		lan966x, FDMA_CH_RELOAD);
 }
 
-static void lan966x_fdma_tx_add_dcb(struct lan966x_tx *tx,
-				    struct lan966x_tx_dcb *dcb)
-{
-	dcb->nextptr = FDMA_DCB_INVALID_DATA;
-	dcb->info = 0;
-}
-
 static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx)
 {
 	struct lan966x *lan966x = tx->lan966x;
-	struct lan966x_tx_dcb *dcb;
-	struct lan966x_db *db;
-	int size;
-	int i, j;
+	struct fdma *fdma = &tx->fdma;
+	int err;
 
-	tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf),
+	tx->dcbs_buf = kcalloc(fdma->n_dcbs, sizeof(struct lan966x_tx_dcb_buf),
 			       GFP_KERNEL);
 	if (!tx->dcbs_buf)
 		return -ENOMEM;
 
-	/* calculate how many pages are needed to allocate the dcbs */
-	size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_KERNEL);
-	if (!tx->dcbs)
+	err = fdma_alloc_coherent(lan966x->dev, fdma);
+	if (err)
 		goto out;
 
-	/* Now for each dcb allocate the db */
-	for (i = 0; i < FDMA_DCB_MAX; ++i) {
-		dcb = &tx->dcbs[i];
-
-		for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) {
-			db = &dcb->db[j];
-			db->dataptr = 0;
-			db->status = 0;
-		}
-
-		lan966x_fdma_tx_add_dcb(tx, dcb);
-	}
+	fdma_dcbs_init(fdma, 0, 0);
 
 	return 0;
 
@@ -280,33 +221,30 @@ out:
 static void lan966x_fdma_tx_free(struct lan966x_tx *tx)
 {
 	struct lan966x *lan966x = tx->lan966x;
-	int size;
 
 	kfree(tx->dcbs_buf);
-
-	size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	dma_free_coherent(lan966x->dev, size, tx->dcbs, tx->dma);
+	fdma_free_coherent(lan966x->dev, &tx->fdma);
 }
 
 static void lan966x_fdma_tx_activate(struct lan966x_tx *tx)
 {
 	struct lan966x *lan966x = tx->lan966x;
+	struct fdma *fdma = &tx->fdma;
 	u32 mask;
 
 	/* When activating a channel, first is required to write the first DCB
 	 * address and then to activate it
 	 */
-	lan_wr(lower_32_bits((u64)tx->dma), lan966x,
-	       FDMA_DCB_LLP(tx->channel_id));
-	lan_wr(upper_32_bits((u64)tx->dma), lan966x,
-	       FDMA_DCB_LLP1(tx->channel_id));
+	lan_wr(lower_32_bits((u64)fdma->dma), lan966x,
+	       FDMA_DCB_LLP(fdma->channel_id));
+	lan_wr(upper_32_bits((u64)fdma->dma), lan966x,
+	       FDMA_DCB_LLP1(fdma->channel_id));
 
-	lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) |
+	lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) |
 	       FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
 	       FDMA_CH_CFG_CH_INJ_PORT_SET(0) |
 	       FDMA_CH_CFG_CH_MEM_SET(1),
-	       lan966x, FDMA_CH_CFG(tx->channel_id));
+	       lan966x, FDMA_CH_CFG(fdma->channel_id));
 
 	/* Start fdma */
 	lan_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0),
@@ -316,13 +254,13 @@ static void lan966x_fdma_tx_activate(struct lan966x_tx *tx)
 	/* Enable interrupts */
 	mask = lan_rd(lan966x, FDMA_INTR_DB_ENA);
 	mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask);
-	mask |= BIT(tx->channel_id);
+	mask |= BIT(fdma->channel_id);
 	lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask),
 		FDMA_INTR_DB_ENA_INTR_DB_ENA,
 		lan966x, FDMA_INTR_DB_ENA);
 
 	/* Activate the channel */
-	lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(tx->channel_id)),
+	lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(fdma->channel_id)),
 		FDMA_CH_ACTIVATE_CH_ACTIVATE,
 		lan966x, FDMA_CH_ACTIVATE);
 }
@@ -330,23 +268,23 @@ static void lan966x_fdma_tx_activate(struct lan966x_tx *tx)
 static void lan966x_fdma_tx_disable(struct lan966x_tx *tx)
 {
 	struct lan966x *lan966x = tx->lan966x;
+	struct fdma *fdma = &tx->fdma;
 	u32 val;
 
 	/* Disable the channel */
-	lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(tx->channel_id)),
+	lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(fdma->channel_id)),
 		FDMA_CH_DISABLE_CH_DISABLE,
 		lan966x, FDMA_CH_DISABLE);
 
 	readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x,
-				  val, !(val & BIT(tx->channel_id)),
+				  val, !(val & BIT(fdma->channel_id)),
 				  READL_SLEEP_US, READL_TIMEOUT_US);
 
-	lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(tx->channel_id)),
+	lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(fdma->channel_id)),
 		FDMA_CH_DB_DISCARD_DB_DISCARD,
 		lan966x, FDMA_CH_DB_DISCARD);
 
 	tx->activated = false;
-	tx->last_in_use = -1;
 }
 
 static void lan966x_fdma_tx_reload(struct lan966x_tx *tx)
@@ -354,7 +292,7 @@ static void lan966x_fdma_tx_reload(struct lan966x_tx *tx)
 	struct lan966x *lan966x = tx->lan966x;
 
 	/* Write the registers to reload the channel */
-	lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->channel_id)),
+	lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->fdma.channel_id)),
 		FDMA_CH_RELOAD_CH_RELOAD,
 		lan966x, FDMA_CH_RELOAD);
 }
@@ -393,23 +331,24 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
 	struct lan966x_tx *tx = &lan966x->tx;
 	struct lan966x_rx *rx = &lan966x->rx;
 	struct lan966x_tx_dcb_buf *dcb_buf;
+	struct fdma *fdma = &tx->fdma;
 	struct xdp_frame_bulk bq;
-	struct lan966x_db *db;
 	unsigned long flags;
 	bool clear = false;
+	struct fdma_db *db;
 	int i;
 
 	xdp_frame_bulk_init(&bq);
 
 	spin_lock_irqsave(&lan966x->tx_lock, flags);
-	for (i = 0; i < FDMA_DCB_MAX; ++i) {
+	for (i = 0; i < fdma->n_dcbs; ++i) {
 		dcb_buf = &tx->dcbs_buf[i];
 
 		if (!dcb_buf->used)
 			continue;
 
-		db = &tx->dcbs[i].db[0];
-		if (!(db->status & FDMA_DCB_STATUS_DONE))
+		db = fdma_db_get(fdma, i, 0);
+		if (!fdma_db_is_done(db))
 			continue;
 
 		dcb_buf->dev->stats.tx_packets++;
@@ -449,27 +388,16 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
 	spin_unlock_irqrestore(&lan966x->tx_lock, flags);
 }
 
-static bool lan966x_fdma_rx_more_frames(struct lan966x_rx *rx)
-{
-	struct lan966x_db *db;
-
-	/* Check if there is any data */
-	db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
-	if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE)))
-		return false;
-
-	return true;
-}
-
 static int lan966x_fdma_rx_check_frame(struct lan966x_rx *rx, u64 *src_port)
 {
 	struct lan966x *lan966x = rx->lan966x;
+	struct fdma *fdma = &rx->fdma;
 	struct lan966x_port *port;
-	struct lan966x_db *db;
+	struct fdma_db *db;
 	struct page *page;
 
-	db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
-	page = rx->page[rx->dcb_index][rx->db_index];
+	db = fdma_db_next_get(fdma);
+	page = rx->page[fdma->dcb_index][fdma->db_index];
 	if (unlikely(!page))
 		return FDMA_ERROR;
 
@@ -494,16 +422,17 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx,
 						 u64 src_port)
 {
 	struct lan966x *lan966x = rx->lan966x;
-	struct lan966x_db *db;
+	struct fdma *fdma = &rx->fdma;
 	struct sk_buff *skb;
+	struct fdma_db *db;
 	struct page *page;
 	u64 timestamp;
 
 	/* Get the received frame and unmap it */
-	db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
-	page = rx->page[rx->dcb_index][rx->db_index];
+	db = fdma_db_next_get(fdma);
+	page = rx->page[fdma->dcb_index][fdma->db_index];
 
-	skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order);
+	skb = build_skb(page_address(page), fdma->db_size);
 	if (unlikely(!skb))
 		goto free_page;
 
@@ -546,21 +475,19 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
 {
 	struct lan966x *lan966x = container_of(napi, struct lan966x, napi);
 	struct lan966x_rx *rx = &lan966x->rx;
-	int dcb_reload = rx->dcb_index;
-	struct lan966x_rx_dcb *old_dcb;
-	struct lan966x_db *db;
+	int old_dcb, dcb_reload, counter = 0;
+	struct fdma *fdma = &rx->fdma;
 	bool redirect = false;
 	struct sk_buff *skb;
-	struct page *page;
-	int counter = 0;
 	u64 src_port;
-	u64 nextptr;
+
+	dcb_reload = fdma->dcb_index;
 
 	lan966x_fdma_tx_clear_buf(lan966x, weight);
 
 	/* Get all received skb */
 	while (counter < weight) {
-		if (!lan966x_fdma_rx_more_frames(rx))
+		if (!fdma_has_frames(fdma))
 			break;
 
 		counter++;
@@ -570,22 +497,22 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
 			break;
 		case FDMA_ERROR:
 			lan966x_fdma_rx_free_page(rx);
-			lan966x_fdma_rx_advance_dcb(rx);
+			fdma_dcb_advance(fdma);
 			goto allocate_new;
 		case FDMA_REDIRECT:
 			redirect = true;
 			fallthrough;
 		case FDMA_TX:
-			lan966x_fdma_rx_advance_dcb(rx);
+			fdma_dcb_advance(fdma);
 			continue;
 		case FDMA_DROP:
 			lan966x_fdma_rx_free_page(rx);
-			lan966x_fdma_rx_advance_dcb(rx);
+			fdma_dcb_advance(fdma);
 			continue;
 		}
 
 		skb = lan966x_fdma_rx_get_frame(rx, src_port);
-		lan966x_fdma_rx_advance_dcb(rx);
+		fdma_dcb_advance(fdma);
 		if (!skb)
 			goto allocate_new;
 
@@ -594,20 +521,14 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
 
 allocate_new:
 	/* Allocate new pages and map them */
-	while (dcb_reload != rx->dcb_index) {
-		db = &rx->dcbs[dcb_reload].db[rx->db_index];
-		page = lan966x_fdma_rx_alloc_page(rx, db);
-		if (unlikely(!page))
-			break;
-		rx->page[dcb_reload][rx->db_index] = page;
-
-		old_dcb = &rx->dcbs[dcb_reload];
+	while (dcb_reload != fdma->dcb_index) {
+		old_dcb = dcb_reload;
 		dcb_reload++;
-		dcb_reload &= FDMA_DCB_MAX - 1;
+		dcb_reload &= fdma->n_dcbs - 1;
+
+		fdma_dcb_add(fdma, old_dcb, FDMA_DCB_INFO_DATAL(fdma->db_size),
+			     FDMA_DCB_STATUS_INTR);
 
-		nextptr = rx->dma + ((unsigned long)old_dcb -
-				     (unsigned long)rx->dcbs);
-		lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr);
 		lan966x_fdma_rx_reload(rx);
 	}
 
@@ -650,56 +571,30 @@ irqreturn_t lan966x_fdma_irq_handler(int irq, void *args)
 static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx)
 {
 	struct lan966x_tx_dcb_buf *dcb_buf;
+	struct fdma *fdma = &tx->fdma;
 	int i;
 
-	for (i = 0; i < FDMA_DCB_MAX; ++i) {
+	for (i = 0; i < fdma->n_dcbs; ++i) {
 		dcb_buf = &tx->dcbs_buf[i];
-		if (!dcb_buf->used && i != tx->last_in_use)
+		if (!dcb_buf->used &&
+		    !fdma_is_last(&tx->fdma, &tx->fdma.dcbs[i]))
 			return i;
 	}
 
 	return -1;
 }
 
-static void lan966x_fdma_tx_setup_dcb(struct lan966x_tx *tx,
-				      int next_to_use, int len,
-				      dma_addr_t dma_addr)
-{
-	struct lan966x_tx_dcb *next_dcb;
-	struct lan966x_db *next_db;
-
-	next_dcb = &tx->dcbs[next_to_use];
-	next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
-
-	next_db = &next_dcb->db[0];
-	next_db->dataptr = dma_addr;
-	next_db->status = FDMA_DCB_STATUS_SOF |
-			  FDMA_DCB_STATUS_EOF |
-			  FDMA_DCB_STATUS_INTR |
-			  FDMA_DCB_STATUS_BLOCKO(0) |
-			  FDMA_DCB_STATUS_BLOCKL(len);
-}
-
-static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
+static void lan966x_fdma_tx_start(struct lan966x_tx *tx)
 {
 	struct lan966x *lan966x = tx->lan966x;
-	struct lan966x_tx_dcb *dcb;
 
 	if (likely(lan966x->tx.activated)) {
-		/* Connect current dcb to the next db */
-		dcb = &tx->dcbs[tx->last_in_use];
-		dcb->nextptr = tx->dma + (next_to_use *
-					  sizeof(struct lan966x_tx_dcb));
-
 		lan966x_fdma_tx_reload(tx);
 	} else {
 		/* Because it is first time, then just activate */
 		lan966x->tx.activated = true;
 		lan966x_fdma_tx_activate(tx);
 	}
-
-	/* Move to next dcb because this last in use */
-	tx->last_in_use = next_to_use;
 }
 
 int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len)
@@ -752,11 +647,6 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len)
 
 		next_dcb_buf->data.xdpf = xdpf;
 		next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
-
-		/* Setup next dcb */
-		lan966x_fdma_tx_setup_dcb(tx, next_to_use,
-					  xdpf->len + IFH_LEN_BYTES,
-					  dma_addr);
 	} else {
 		page = ptr;
 
@@ -773,11 +663,6 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len)
 
 		next_dcb_buf->data.page = page;
 		next_dcb_buf->len = len + IFH_LEN_BYTES;
-
-		/* Setup next dcb */
-		lan966x_fdma_tx_setup_dcb(tx, next_to_use,
-					  len + IFH_LEN_BYTES,
-					  dma_addr + XDP_PACKET_HEADROOM);
 	}
 
 	/* Fill up the buffer */
@@ -788,8 +673,19 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len)
 	next_dcb_buf->ptp = false;
 	next_dcb_buf->dev = port->dev;
 
+	__fdma_dcb_add(&tx->fdma,
+		       next_to_use,
+		       0,
+		       FDMA_DCB_STATUS_INTR |
+		       FDMA_DCB_STATUS_SOF |
+		       FDMA_DCB_STATUS_EOF |
+		       FDMA_DCB_STATUS_BLOCKO(0) |
+		       FDMA_DCB_STATUS_BLOCKL(next_dcb_buf->len),
+		       &fdma_nextptr_cb,
+		       &lan966x_fdma_xdp_tx_dataptr_cb);
+
 	/* Start the transmission */
-	lan966x_fdma_tx_start(tx, next_to_use);
+	lan966x_fdma_tx_start(tx);
 
 out:
 	spin_unlock(&lan966x->tx_lock);
@@ -847,9 +743,6 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
 		goto release;
 	}
 
-	/* Setup next dcb */
-	lan966x_fdma_tx_setup_dcb(tx, next_to_use, skb->len, dma_addr);
-
 	/* Fill up the buffer */
 	next_dcb_buf = &tx->dcbs_buf[next_to_use];
 	next_dcb_buf->use_skb = true;
@@ -861,12 +754,21 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
 	next_dcb_buf->ptp = false;
 	next_dcb_buf->dev = dev;
 
+	fdma_dcb_add(&tx->fdma,
+		     next_to_use,
+		     0,
+		     FDMA_DCB_STATUS_INTR |
+		     FDMA_DCB_STATUS_SOF |
+		     FDMA_DCB_STATUS_EOF |
+		     FDMA_DCB_STATUS_BLOCKO(0) |
+		     FDMA_DCB_STATUS_BLOCKL(skb->len));
+
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
 	    LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
 		next_dcb_buf->ptp = true;
 
 	/* Start the transmission */
-	lan966x_fdma_tx_start(tx, next_to_use);
+	lan966x_fdma_tx_start(tx);
 
 	return NETDEV_TX_OK;
 
@@ -908,14 +810,11 @@ static int lan966x_qsys_sw_status(struct lan966x *lan966x)
 static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu)
 {
 	struct page_pool *page_pool;
-	dma_addr_t rx_dma;
-	void *rx_dcbs;
-	u32 size;
+	struct fdma fdma_rx_old;
 	int err;
 
 	/* Store these for later to free them */
-	rx_dma = lan966x->rx.dma;
-	rx_dcbs = lan966x->rx.dcbs;
+	memcpy(&fdma_rx_old, &lan966x->rx.fdma, sizeof(struct fdma));
 	page_pool = lan966x->rx.page_pool;
 
 	napi_synchronize(&lan966x->napi);
@@ -931,9 +830,7 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu)
 		goto restore;
 	lan966x_fdma_rx_start(&lan966x->rx);
 
-	size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	dma_free_coherent(lan966x->dev, size, rx_dcbs, rx_dma);
+	fdma_free_coherent(lan966x->dev, &fdma_rx_old);
 
 	page_pool_destroy(page_pool);
 
@@ -943,8 +840,7 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu)
 	return err;
 restore:
 	lan966x->rx.page_pool = page_pool;
-	lan966x->rx.dma = rx_dma;
-	lan966x->rx.dcbs = rx_dcbs;
+	memcpy(&lan966x->rx.fdma, &fdma_rx_old, sizeof(struct fdma));
 	lan966x_fdma_rx_start(&lan966x->rx);
 
 	return err;
@@ -1034,11 +930,24 @@ int lan966x_fdma_init(struct lan966x *lan966x)
 		return 0;
 
 	lan966x->rx.lan966x = lan966x;
-	lan966x->rx.channel_id = FDMA_XTR_CHANNEL;
+	lan966x->rx.fdma.channel_id = FDMA_XTR_CHANNEL;
+	lan966x->rx.fdma.n_dcbs = FDMA_DCB_MAX;
+	lan966x->rx.fdma.n_dbs = FDMA_RX_DCB_MAX_DBS;
+	lan966x->rx.fdma.priv = lan966x;
+	lan966x->rx.fdma.size = fdma_get_size(&lan966x->rx.fdma);
+	lan966x->rx.fdma.db_size = PAGE_SIZE << lan966x->rx.page_order;
+	lan966x->rx.fdma.ops.nextptr_cb = &fdma_nextptr_cb;
+	lan966x->rx.fdma.ops.dataptr_cb = &lan966x_fdma_rx_dataptr_cb;
 	lan966x->rx.max_mtu = lan966x_fdma_get_max_frame(lan966x);
 	lan966x->tx.lan966x = lan966x;
-	lan966x->tx.channel_id = FDMA_INJ_CHANNEL;
-	lan966x->tx.last_in_use = -1;
+	lan966x->tx.fdma.channel_id = FDMA_INJ_CHANNEL;
+	lan966x->tx.fdma.n_dcbs = FDMA_DCB_MAX;
+	lan966x->tx.fdma.n_dbs = FDMA_TX_DCB_MAX_DBS;
+	lan966x->tx.fdma.priv = lan966x;
+	lan966x->tx.fdma.size = fdma_get_size(&lan966x->tx.fdma);
+	lan966x->tx.fdma.db_size = PAGE_SIZE << lan966x->rx.page_order;
+	lan966x->tx.fdma.ops.nextptr_cb = &fdma_nextptr_cb;
+	lan966x->tx.fdma.ops.dataptr_cb = &lan966x_fdma_tx_dataptr_cb;
 
 	err = lan966x_fdma_rx_alloc(&lan966x->rx);
 	if (err)
@@ -1046,7 +955,7 @@ int lan966x_fdma_init(struct lan966x *lan966x)
 
 	err = lan966x_fdma_tx_alloc(&lan966x->tx);
 	if (err) {
-		lan966x_fdma_rx_free(&lan966x->rx);
+		fdma_free_coherent(lan966x->dev, &lan966x->rx.fdma);
 		return err;
 	}
 
@@ -1067,7 +976,7 @@ void lan966x_fdma_deinit(struct lan966x *lan966x)
 	napi_disable(&lan966x->napi);
 
 	lan966x_fdma_rx_free_pages(&lan966x->rx);
-	lan966x_fdma_rx_free(&lan966x->rx);
+	fdma_free_coherent(lan966x->dev, &lan966x->rx.fdma);
 	page_pool_destroy(lan966x->rx.page_pool);
 	lan966x_fdma_tx_free(&lan966x->tx);
 }
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index ec672af12e25..534d4716d5f7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -816,7 +816,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
 			 NETIF_F_HW_VLAN_STAG_TX |
 			 NETIF_F_HW_TC;
 	dev->hw_features |= NETIF_F_HW_TC;
-	dev->priv_flags |= IFF_SEE_ALL_HWTSTAMP_REQUESTS;
+	dev->see_all_hwtstamp_requests = true;
 	dev->needed_headroom = IFH_LEN_BYTES;
 
 	eth_hw_addr_gen(dev, lan966x->base_mac, p + 1);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index f8bebbcf77b2..25cb2f61986f 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -16,6 +16,7 @@
 #include <net/switchdev.h>
 #include <net/xdp.h>
 
+#include <fdma_api.h>
 #include <vcap_api.h>
 #include <vcap_api_client.h>
 
@@ -76,15 +77,6 @@
 
 #define FDMA_RX_DCB_MAX_DBS		1
 #define FDMA_TX_DCB_MAX_DBS		1
-#define FDMA_DCB_INFO_DATAL(x)		((x) & GENMASK(15, 0))
-
-#define FDMA_DCB_STATUS_BLOCKL(x)	((x) & GENMASK(15, 0))
-#define FDMA_DCB_STATUS_SOF		BIT(16)
-#define FDMA_DCB_STATUS_EOF		BIT(17)
-#define FDMA_DCB_STATUS_INTR		BIT(18)
-#define FDMA_DCB_STATUS_DONE		BIT(19)
-#define FDMA_DCB_STATUS_BLOCKO(x)	(((x) << 20) & GENMASK(31, 20))
-#define FDMA_DCB_INVALID_DATA		0x1
 
 #define FDMA_XTR_CHANNEL		6
 #define FDMA_INJ_CHANNEL		0
@@ -199,49 +191,14 @@ enum vcap_is1_port_sel_rt {
 
 struct lan966x_port;
 
-struct lan966x_db {
-	u64 dataptr;
-	u64 status;
-};
-
-struct lan966x_rx_dcb {
-	u64 nextptr;
-	u64 info;
-	struct lan966x_db db[FDMA_RX_DCB_MAX_DBS];
-};
-
-struct lan966x_tx_dcb {
-	u64 nextptr;
-	u64 info;
-	struct lan966x_db db[FDMA_TX_DCB_MAX_DBS];
-};
-
 struct lan966x_rx {
 	struct lan966x *lan966x;
 
-	/* Pointer to the array of hardware dcbs. */
-	struct lan966x_rx_dcb *dcbs;
-
-	/* Pointer to the last address in the dcbs. */
-	struct lan966x_rx_dcb *last_entry;
+	struct fdma fdma;
 
 	/* For each DB, there is a page */
 	struct page *page[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS];
 
-	/* Represents the db_index, it can have a value between 0 and
-	 * FDMA_RX_DCB_MAX_DBS, once it reaches the value of FDMA_RX_DCB_MAX_DBS
-	 * it means that the DCB can be reused.
-	 */
-	int db_index;
-
-	/* Represents the index in the dcbs. It has a value between 0 and
-	 * FDMA_DCB_MAX
-	 */
-	int dcb_index;
-
-	/* Represents the dma address to the dcbs array */
-	dma_addr_t dma;
-
 	/* Represents the page order that is used to allocate the pages for the
 	 * RX buffers. This value is calculated based on max MTU of the devices.
 	 */
@@ -252,8 +209,6 @@ struct lan966x_rx {
 	 */
 	u32 max_mtu;
 
-	u8 channel_id;
-
 	struct page_pool *page_pool;
 };
 
@@ -275,18 +230,11 @@ struct lan966x_tx_dcb_buf {
 struct lan966x_tx {
 	struct lan966x *lan966x;
 
-	/* Pointer to the dcb list */
-	struct lan966x_tx_dcb *dcbs;
-	u16 last_in_use;
-
-	/* Represents the DMA address to the first entry of the dcb entries. */
-	dma_addr_t dma;
+	struct fdma fdma;
 
 	/* Array of dcbs that are given to the HW */
 	struct lan966x_tx_dcb_buf *dcbs_buf;
 
-	u8 channel_id;
-
 	bool activated;
 };
 
diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig
index f58c506bda22..3f04992eace6 100644
--- a/drivers/net/ethernet/microchip/sparx5/Kconfig
+++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
@@ -10,6 +10,7 @@ config SPARX5_SWITCH
 	select PHY_SPARX5_SERDES
 	select RESET_CONTROLLER
 	select VCAP
+	select FDMA
 	help
 	  This driver supports the Sparx5 network switch device.
 
diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index b68fe9c9a656..288de95add18 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -18,3 +18,4 @@ sparx5-switch-$(CONFIG_DEBUG_FS) += sparx5_vcap_debugfs.o
 
 # Provide include files
 ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap
+ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/fdma
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
index 4f800c1a435d..d898a7238b48 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
@@ -1194,16 +1194,13 @@ static int sparx5_get_ts_info(struct net_device *dev,
 
 	phc = &sparx5->phc[SPARX5_PHC_PORT];
 
-	info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1;
-	if (info->phc_index == -1) {
-		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-					 SOF_TIMESTAMPING_RX_SOFTWARE |
-					 SOF_TIMESTAMPING_SOFTWARE;
+	if (phc->clock) {
+		info->phc_index = ptp_clock_index(phc->clock);
+	} else {
+		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	}
 	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-				 SOF_TIMESTAMPING_RX_SOFTWARE |
-				 SOF_TIMESTAMPING_SOFTWARE |
 				 SOF_TIMESTAMPING_TX_HARDWARE |
 				 SOF_TIMESTAMPING_RX_HARDWARE |
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
index 1915998f6079..61df874b7623 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
@@ -21,107 +21,51 @@
 #define FDMA_XTR_CHANNEL		6
 #define FDMA_INJ_CHANNEL		0
 
-#define FDMA_DCB_INFO_DATAL(x)		((x) & GENMASK(15, 0))
-#define FDMA_DCB_INFO_TOKEN		BIT(17)
-#define FDMA_DCB_INFO_INTR		BIT(18)
-#define FDMA_DCB_INFO_SW(x)		(((x) << 24) & GENMASK(31, 24))
-
-#define FDMA_DCB_STATUS_BLOCKL(x)	((x) & GENMASK(15, 0))
-#define FDMA_DCB_STATUS_SOF		BIT(16)
-#define FDMA_DCB_STATUS_EOF		BIT(17)
-#define FDMA_DCB_STATUS_INTR		BIT(18)
-#define FDMA_DCB_STATUS_DONE		BIT(19)
-#define FDMA_DCB_STATUS_BLOCKO(x)	(((x) << 20) & GENMASK(31, 20))
-#define FDMA_DCB_INVALID_DATA		0x1
-
 #define FDMA_XTR_BUFFER_SIZE		2048
 #define FDMA_WEIGHT			4
 
-/* Frame DMA DCB format
- *
- * +---------------------------+
- * |         Next Ptr          |
- * +---------------------------+
- * |   Reserved  |    Info     |
- * +---------------------------+
- * |         Data0 Ptr         |
- * +---------------------------+
- * |   Reserved  |    Status0  |
- * +---------------------------+
- * |         Data1 Ptr         |
- * +---------------------------+
- * |   Reserved  |    Status1  |
- * +---------------------------+
- * |         Data2 Ptr         |
- * +---------------------------+
- * |   Reserved  |    Status2  |
- * |-------------|-------------|
- * |                           |
- * |                           |
- * |                           |
- * |                           |
- * |                           |
- * |---------------------------|
- * |         Data14 Ptr        |
- * +-------------|-------------+
- * |   Reserved  |    Status14 |
- * +-------------|-------------+
- */
-
-/* For each hardware DB there is an entry in this list and when the HW DB
- * entry is used, this SW DB entry is moved to the back of the list
- */
-struct sparx5_db {
-	struct list_head list;
-	void *cpu_addr;
-};
-
-static void sparx5_fdma_rx_add_dcb(struct sparx5_rx *rx,
-				   struct sparx5_rx_dcb_hw *dcb,
-				   u64 nextptr)
+static int sparx5_fdma_tx_dataptr_cb(struct fdma *fdma, int dcb, int db,
+				     u64 *dataptr)
 {
-	int idx = 0;
-
-	/* Reset the status of the DB */
-	for (idx = 0; idx < FDMA_RX_DCB_MAX_DBS; ++idx) {
-		struct sparx5_db_hw *db = &dcb->db[idx];
+	*dataptr = fdma->dma + (sizeof(struct fdma_dcb) * fdma->n_dcbs) +
+		   ((dcb * fdma->n_dbs + db) * fdma->db_size);
 
-		db->status = FDMA_DCB_STATUS_INTR;
-	}
-	dcb->nextptr = FDMA_DCB_INVALID_DATA;
-	dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
-	rx->last_entry->nextptr = nextptr;
-	rx->last_entry = dcb;
+	return 0;
 }
 
-static void sparx5_fdma_tx_add_dcb(struct sparx5_tx *tx,
-				   struct sparx5_tx_dcb_hw *dcb,
-				   u64 nextptr)
+static int sparx5_fdma_rx_dataptr_cb(struct fdma *fdma, int dcb, int db,
+				     u64 *dataptr)
 {
-	int idx = 0;
+	struct sparx5 *sparx5 = fdma->priv;
+	struct sparx5_rx *rx = &sparx5->rx;
+	struct sk_buff *skb;
 
-	/* Reset the status of the DB */
-	for (idx = 0; idx < FDMA_TX_DCB_MAX_DBS; ++idx) {
-		struct sparx5_db_hw *db = &dcb->db[idx];
+	skb = __netdev_alloc_skb(rx->ndev, fdma->db_size, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return -ENOMEM;
 
-		db->status = FDMA_DCB_STATUS_DONE;
-	}
-	dcb->nextptr = FDMA_DCB_INVALID_DATA;
-	dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
+	*dataptr = virt_to_phys(skb->data);
+
+	rx->skb[dcb][db] = skb;
+
+	return 0;
 }
 
 static void sparx5_fdma_rx_activate(struct sparx5 *sparx5, struct sparx5_rx *rx)
 {
+	struct fdma *fdma = &rx->fdma;
+
 	/* Write the buffer address in the LLP and LLP1 regs */
-	spx5_wr(((u64)rx->dma) & GENMASK(31, 0), sparx5,
-		FDMA_DCB_LLP(rx->channel_id));
-	spx5_wr(((u64)rx->dma) >> 32, sparx5, FDMA_DCB_LLP1(rx->channel_id));
+	spx5_wr(((u64)fdma->dma) & GENMASK(31, 0), sparx5,
+		FDMA_DCB_LLP(fdma->channel_id));
+	spx5_wr(((u64)fdma->dma) >> 32, sparx5,
+		FDMA_DCB_LLP1(fdma->channel_id));
 
 	/* Set the number of RX DBs to be used, and DB end-of-frame interrupt */
-	spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) |
+	spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) |
 		FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
 		FDMA_CH_CFG_CH_INJ_PORT_SET(XTR_QUEUE),
-		sparx5, FDMA_CH_CFG(rx->channel_id));
+		sparx5, FDMA_CH_CFG(fdma->channel_id));
 
 	/* Set the RX Watermark to max */
 	spx5_rmw(FDMA_XTR_CFG_XTR_FIFO_WM_SET(31), FDMA_XTR_CFG_XTR_FIFO_WM,
@@ -133,22 +77,24 @@ static void sparx5_fdma_rx_activate(struct sparx5 *sparx5, struct sparx5_rx *rx)
 		 sparx5, FDMA_PORT_CTRL(0));
 
 	/* Enable RX channel DB interrupt */
-	spx5_rmw(BIT(rx->channel_id),
-		 BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+	spx5_rmw(BIT(fdma->channel_id),
+		 BIT(fdma->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
 		 sparx5, FDMA_INTR_DB_ENA);
 
 	/* Activate the RX channel */
-	spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+	spx5_wr(BIT(fdma->channel_id), sparx5, FDMA_CH_ACTIVATE);
 }
 
 static void sparx5_fdma_rx_deactivate(struct sparx5 *sparx5, struct sparx5_rx *rx)
 {
+	struct fdma *fdma = &rx->fdma;
+
 	/* Deactivate the RX channel */
-	spx5_rmw(0, BIT(rx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+	spx5_rmw(0, BIT(fdma->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
 		 sparx5, FDMA_CH_ACTIVATE);
 
 	/* Disable RX channel DB interrupt */
-	spx5_rmw(0, BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+	spx5_rmw(0, BIT(fdma->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
 		 sparx5, FDMA_INTR_DB_ENA);
 
 	/* Stop RX fdma */
@@ -158,75 +104,55 @@ static void sparx5_fdma_rx_deactivate(struct sparx5 *sparx5, struct sparx5_rx *r
 
 static void sparx5_fdma_tx_activate(struct sparx5 *sparx5, struct sparx5_tx *tx)
 {
+	struct fdma *fdma = &tx->fdma;
+
 	/* Write the buffer address in the LLP and LLP1 regs */
-	spx5_wr(((u64)tx->dma) & GENMASK(31, 0), sparx5,
-		FDMA_DCB_LLP(tx->channel_id));
-	spx5_wr(((u64)tx->dma) >> 32, sparx5, FDMA_DCB_LLP1(tx->channel_id));
+	spx5_wr(((u64)fdma->dma) & GENMASK(31, 0), sparx5,
+		FDMA_DCB_LLP(fdma->channel_id));
+	spx5_wr(((u64)fdma->dma) >> 32, sparx5,
+		FDMA_DCB_LLP1(fdma->channel_id));
 
 	/* Set the number of TX DBs to be used, and DB end-of-frame interrupt */
-	spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) |
+	spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) |
 		FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
 		FDMA_CH_CFG_CH_INJ_PORT_SET(INJ_QUEUE),
-		sparx5, FDMA_CH_CFG(tx->channel_id));
+		sparx5, FDMA_CH_CFG(fdma->channel_id));
 
 	/* Start TX fdma */
 	spx5_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0), FDMA_PORT_CTRL_INJ_STOP,
 		 sparx5, FDMA_PORT_CTRL(0));
 
 	/* Activate the channel */
-	spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+	spx5_wr(BIT(fdma->channel_id), sparx5, FDMA_CH_ACTIVATE);
 }
 
 static void sparx5_fdma_tx_deactivate(struct sparx5 *sparx5, struct sparx5_tx *tx)
 {
 	/* Disable the channel */
-	spx5_rmw(0, BIT(tx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+	spx5_rmw(0, BIT(tx->fdma.channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
 		 sparx5, FDMA_CH_ACTIVATE);
 }
 
-static void sparx5_fdma_rx_reload(struct sparx5 *sparx5, struct sparx5_rx *rx)
+static void sparx5_fdma_reload(struct sparx5 *sparx5, struct fdma *fdma)
 {
 	/* Reload the RX channel */
-	spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_RELOAD);
-}
-
-static void sparx5_fdma_tx_reload(struct sparx5 *sparx5, struct sparx5_tx *tx)
-{
-	/* Reload the TX channel */
-	spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_RELOAD);
-}
-
-static struct sk_buff *sparx5_fdma_rx_alloc_skb(struct sparx5_rx *rx)
-{
-	return __netdev_alloc_skb(rx->ndev, FDMA_XTR_BUFFER_SIZE,
-				  GFP_ATOMIC);
+	spx5_wr(BIT(fdma->channel_id), sparx5, FDMA_CH_RELOAD);
 }
 
 static bool sparx5_fdma_rx_get_frame(struct sparx5 *sparx5, struct sparx5_rx *rx)
 {
-	struct sparx5_db_hw *db_hw;
-	unsigned int packet_size;
+	struct fdma *fdma = &rx->fdma;
 	struct sparx5_port *port;
-	struct sk_buff *new_skb;
+	struct fdma_db *db_hw;
 	struct frame_info fi;
 	struct sk_buff *skb;
-	dma_addr_t dma_addr;
 
 	/* Check if the DCB is done */
-	db_hw = &rx->dcb_entries[rx->dcb_index].db[rx->db_index];
-	if (unlikely(!(db_hw->status & FDMA_DCB_STATUS_DONE)))
-		return false;
-	skb = rx->skb[rx->dcb_index][rx->db_index];
-	/* Replace the DB entry with a new SKB */
-	new_skb = sparx5_fdma_rx_alloc_skb(rx);
-	if (unlikely(!new_skb))
+	db_hw = fdma_db_next_get(fdma);
+	if (unlikely(!fdma_db_is_done(db_hw)))
 		return false;
-	/* Map the new skb data and set the new skb */
-	dma_addr = virt_to_phys(new_skb->data);
-	rx->skb[rx->dcb_index][rx->db_index] = new_skb;
-	db_hw->dataptr = dma_addr;
-	packet_size = FDMA_DCB_STATUS_BLOCKL(db_hw->status);
-	skb_put(skb, packet_size);
+	skb = rx->skb[fdma->dcb_index][fdma->db_index];
+	skb_put(skb, fdma_db_len_get(db_hw));
 	/* Now do the normal processing of the skb */
 	sparx5_ifh_parse((u32 *)skb->data, &fi);
 	/* Map to port netdev */
@@ -259,84 +185,62 @@ static int sparx5_fdma_napi_callback(struct napi_struct *napi, int weight)
 {
 	struct sparx5_rx *rx = container_of(napi, struct sparx5_rx, napi);
 	struct sparx5 *sparx5 = container_of(rx, struct sparx5, rx);
+	struct fdma *fdma = &rx->fdma;
 	int counter = 0;
 
 	while (counter < weight && sparx5_fdma_rx_get_frame(sparx5, rx)) {
-		struct sparx5_rx_dcb_hw *old_dcb;
-
-		rx->db_index++;
+		fdma_db_advance(fdma);
 		counter++;
 		/* Check if the DCB can be reused */
-		if (rx->db_index != FDMA_RX_DCB_MAX_DBS)
+		if (fdma_dcb_is_reusable(fdma))
 			continue;
-		/* As the DCB  can be reused, just advance the dcb_index
-		 * pointer and set the nextptr in the DCB
-		 */
-		rx->db_index = 0;
-		old_dcb = &rx->dcb_entries[rx->dcb_index];
-		rx->dcb_index++;
-		rx->dcb_index &= FDMA_DCB_MAX - 1;
-		sparx5_fdma_rx_add_dcb(rx, old_dcb,
-				       rx->dma +
-				       ((unsigned long)old_dcb -
-					(unsigned long)rx->dcb_entries));
+		fdma_dcb_add(fdma, fdma->dcb_index,
+			     FDMA_DCB_INFO_DATAL(fdma->db_size),
+			     FDMA_DCB_STATUS_INTR);
+		fdma_db_reset(fdma);
+		fdma_dcb_advance(fdma);
 	}
 	if (counter < weight) {
 		napi_complete_done(&rx->napi, counter);
-		spx5_rmw(BIT(rx->channel_id),
-			 BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+		spx5_rmw(BIT(fdma->channel_id),
+			 BIT(fdma->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
 			 sparx5, FDMA_INTR_DB_ENA);
 	}
 	if (counter)
-		sparx5_fdma_rx_reload(sparx5, rx);
+		sparx5_fdma_reload(sparx5, fdma);
 	return counter;
 }
 
-static struct sparx5_tx_dcb_hw *sparx5_fdma_next_dcb(struct sparx5_tx *tx,
-						     struct sparx5_tx_dcb_hw *dcb)
-{
-	struct sparx5_tx_dcb_hw *next_dcb;
-
-	next_dcb = dcb;
-	next_dcb++;
-	/* Handle wrap-around */
-	if ((unsigned long)next_dcb >=
-	    ((unsigned long)tx->first_entry + FDMA_DCB_MAX * sizeof(*dcb)))
-		next_dcb = tx->first_entry;
-	return next_dcb;
-}
-
 int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
 {
-	struct sparx5_tx_dcb_hw *next_dcb_hw;
 	struct sparx5_tx *tx = &sparx5->tx;
+	struct fdma *fdma = &tx->fdma;
 	static bool first_time = true;
-	struct sparx5_db_hw *db_hw;
-	struct sparx5_db *db;
+	void *virt_addr;
 
-	next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry);
-	db_hw = &next_dcb_hw->db[0];
-	if (!(db_hw->status & FDMA_DCB_STATUS_DONE))
+	fdma_dcb_advance(fdma);
+	if (!fdma_db_is_done(fdma_db_get(fdma, fdma->dcb_index, 0)))
 		return -EINVAL;
-	db = list_first_entry(&tx->db_list, struct sparx5_db, list);
-	list_move_tail(&db->list, &tx->db_list);
-	next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA;
-	tx->curr_entry->nextptr = tx->dma +
-		((unsigned long)next_dcb_hw -
-		 (unsigned long)tx->first_entry);
-	tx->curr_entry = next_dcb_hw;
-	memset(db->cpu_addr, 0, FDMA_XTR_BUFFER_SIZE);
-	memcpy(db->cpu_addr, ifh, IFH_LEN * 4);
-	memcpy(db->cpu_addr + IFH_LEN * 4, skb->data, skb->len);
-	db_hw->status = FDMA_DCB_STATUS_SOF |
-			FDMA_DCB_STATUS_EOF |
-			FDMA_DCB_STATUS_BLOCKO(0) |
-			FDMA_DCB_STATUS_BLOCKL(skb->len + IFH_LEN * 4 + 4);
+
+	/* Get the virtual address of the dataptr for the next DB */
+	virt_addr = ((u8 *)fdma->dcbs +
+		     (sizeof(struct fdma_dcb) * fdma->n_dcbs) +
+		     ((fdma->dcb_index * fdma->n_dbs) * fdma->db_size));
+
+	memcpy(virt_addr, ifh, IFH_LEN * 4);
+	memcpy(virt_addr + IFH_LEN * 4, skb->data, skb->len);
+
+	fdma_dcb_add(fdma, fdma->dcb_index, 0,
+		     FDMA_DCB_STATUS_SOF |
+		     FDMA_DCB_STATUS_EOF |
+		     FDMA_DCB_STATUS_BLOCKO(0) |
+		     FDMA_DCB_STATUS_BLOCKL(skb->len + IFH_LEN * 4 + 4));
+
 	if (first_time) {
 		sparx5_fdma_tx_activate(sparx5, tx);
 		first_time = false;
 	} else {
-		sparx5_fdma_tx_reload(sparx5, tx);
+		sparx5_fdma_reload(sparx5, fdma);
 	}
 	return NETDEV_TX_OK;
 }
@@ -344,43 +248,16 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
 static int sparx5_fdma_rx_alloc(struct sparx5 *sparx5)
 {
 	struct sparx5_rx *rx = &sparx5->rx;
-	struct sparx5_rx_dcb_hw *dcb;
-	int idx, jdx;
-	int size;
-
-	size = sizeof(struct sparx5_rx_dcb_hw) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	rx->dcb_entries = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
-	if (!rx->dcb_entries)
-		return -ENOMEM;
-	rx->dma = virt_to_phys(rx->dcb_entries);
-	rx->last_entry = rx->dcb_entries;
-	rx->db_index = 0;
-	rx->dcb_index = 0;
-	/* Now for each dcb allocate the db */
-	for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
-		dcb = &rx->dcb_entries[idx];
-		dcb->info = 0;
-		/* For each db allocate an skb and map skb data pointer to the DB
-		 * dataptr. In this way when the frame is received the skb->data
-		 * will contain the frame, so no memcpy is needed
-		 */
-		for (jdx = 0; jdx < FDMA_RX_DCB_MAX_DBS; ++jdx) {
-			struct sparx5_db_hw *db_hw = &dcb->db[jdx];
-			dma_addr_t dma_addr;
-			struct sk_buff *skb;
-
-			skb = sparx5_fdma_rx_alloc_skb(rx);
-			if (!skb)
-				return -ENOMEM;
-
-			dma_addr = virt_to_phys(skb->data);
-			db_hw->dataptr = dma_addr;
-			db_hw->status = 0;
-			rx->skb[idx][jdx] = skb;
-		}
-		sparx5_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * idx);
-	}
+	struct fdma *fdma = &rx->fdma;
+	int err;
+
+	err = fdma_alloc_phys(fdma);
+	if (err)
+		return err;
+
+	fdma_dcbs_init(fdma, FDMA_DCB_INFO_DATAL(fdma->db_size),
+		       FDMA_DCB_STATUS_INTR);
+
 	netif_napi_add_weight(rx->ndev, &rx->napi, sparx5_fdma_napi_callback,
 			      FDMA_WEIGHT);
 	napi_enable(&rx->napi);
@@ -391,57 +268,33 @@ static int sparx5_fdma_rx_alloc(struct sparx5 *sparx5)
 static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5)
 {
 	struct sparx5_tx *tx = &sparx5->tx;
-	struct sparx5_tx_dcb_hw *dcb;
-	int idx, jdx;
-	int size;
-
-	size = sizeof(struct sparx5_tx_dcb_hw) * FDMA_DCB_MAX;
-	size = ALIGN(size, PAGE_SIZE);
-	tx->curr_entry = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
-	if (!tx->curr_entry)
-		return -ENOMEM;
-	tx->dma = virt_to_phys(tx->curr_entry);
-	tx->first_entry = tx->curr_entry;
-	INIT_LIST_HEAD(&tx->db_list);
-	/* Now for each dcb allocate the db */
-	for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
-		dcb = &tx->curr_entry[idx];
-		dcb->info = 0;
-		/* TX databuffers must be 16byte aligned */
-		for (jdx = 0; jdx < FDMA_TX_DCB_MAX_DBS; ++jdx) {
-			struct sparx5_db_hw *db_hw = &dcb->db[jdx];
-			struct sparx5_db *db;
-			dma_addr_t phys;
-			void *cpu_addr;
-
-			cpu_addr = devm_kzalloc(sparx5->dev,
-						FDMA_XTR_BUFFER_SIZE,
-						GFP_KERNEL);
-			if (!cpu_addr)
-				return -ENOMEM;
-			phys = virt_to_phys(cpu_addr);
-			db_hw->dataptr = phys;
-			db_hw->status = 0;
-			db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL);
-			if (!db)
-				return -ENOMEM;
-			db->cpu_addr = cpu_addr;
-			list_add_tail(&db->list, &tx->db_list);
-		}
-		sparx5_fdma_tx_add_dcb(tx, dcb, tx->dma + sizeof(*dcb) * idx);
-		/* Let the curr_entry to point to the last allocated entry */
-		if (idx == FDMA_DCB_MAX - 1)
-			tx->curr_entry = dcb;
-	}
+	struct fdma *fdma = &tx->fdma;
+	int err;
+
+	err = fdma_alloc_phys(fdma);
+	if (err)
+		return err;
+
+	fdma_dcbs_init(fdma, FDMA_DCB_INFO_DATAL(fdma->db_size),
+		       FDMA_DCB_STATUS_DONE);
+
 	return 0;
 }
 
 static void sparx5_fdma_rx_init(struct sparx5 *sparx5,
 				struct sparx5_rx *rx, int channel)
 {
+	struct fdma *fdma = &rx->fdma;
 	int idx;
 
-	rx->channel_id = channel;
+	fdma->channel_id = channel;
+	fdma->n_dcbs = FDMA_DCB_MAX;
+	fdma->n_dbs = FDMA_RX_DCB_MAX_DBS;
+	fdma->priv = sparx5;
+	fdma->db_size = ALIGN(FDMA_XTR_BUFFER_SIZE, PAGE_SIZE);
+	fdma->size = fdma_get_size(&sparx5->rx.fdma);
+	fdma->ops.dataptr_cb = &sparx5_fdma_rx_dataptr_cb;
+	fdma->ops.nextptr_cb = &fdma_nextptr_cb;
 	/* Fetch a netdev for SKB and NAPI use, any will do */
 	for (idx = 0; idx < SPX5_PORTS; ++idx) {
 		struct sparx5_port *port = sparx5->ports[idx];
@@ -456,7 +309,16 @@ static void sparx5_fdma_rx_init(struct sparx5 *sparx5,
 static void sparx5_fdma_tx_init(struct sparx5 *sparx5,
 				struct sparx5_tx *tx, int channel)
 {
-	tx->channel_id = channel;
+	struct fdma *fdma = &tx->fdma;
+
+	fdma->channel_id = channel;
+	fdma->n_dcbs = FDMA_DCB_MAX;
+	fdma->n_dbs = FDMA_TX_DCB_MAX_DBS;
+	fdma->priv = sparx5;
+	fdma->db_size = ALIGN(FDMA_XTR_BUFFER_SIZE, PAGE_SIZE);
+	fdma->size = fdma_get_size_contiguous(&sparx5->tx.fdma);
+	fdma->ops.dataptr_cb = &sparx5_fdma_tx_dataptr_cb;
+	fdma->ops.nextptr_cb = &fdma_nextptr_cb;
 }
 
 irqreturn_t sparx5_fdma_handler(int irq, void *args)
@@ -594,5 +456,7 @@ int sparx5_fdma_stop(struct sparx5 *sparx5)
 	read_poll_timeout(sparx5_fdma_port_ctrl, val,
 			  FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY_GET(val) == 0,
 			  500, 10000, 0, sparx5);
+	fdma_free_phys(&sparx5->rx.fdma);
+	fdma_free_phys(&sparx5->tx.fdma);
 	return 0;
 }
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 1982ae03b4fe..3309060b1e4c 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -20,6 +20,8 @@
 #include <linux/debugfs.h>
 #include <net/flow_offload.h>
 
+#include <fdma_api.h>
+
 #include "sparx5_main_regs.h"
 
 /* Target chip type */
@@ -100,23 +102,6 @@ enum sparx5_vlan_port_type {
 
 struct sparx5;
 
-struct sparx5_db_hw {
-	u64 dataptr;
-	u64 status;
-};
-
-struct sparx5_rx_dcb_hw {
-	u64 nextptr;
-	u64 info;
-	struct sparx5_db_hw db[FDMA_RX_DCB_MAX_DBS];
-};
-
-struct sparx5_tx_dcb_hw {
-	u64 nextptr;
-	u64 info;
-	struct sparx5_db_hw db[FDMA_TX_DCB_MAX_DBS];
-};
-
 /* Frame DMA receive state:
  * For each DB, there is a SKB, and the skb data pointer is mapped in
  * the DB. Once a frame is received the skb is given to the upper layers
@@ -124,14 +109,10 @@ struct sparx5_tx_dcb_hw {
  * When the db_index reached FDMA_RX_DCB_MAX_DBS the DB is reused.
  */
 struct sparx5_rx {
-	struct sparx5_rx_dcb_hw *dcb_entries;
-	struct sparx5_rx_dcb_hw *last_entry;
+	struct fdma fdma;
 	struct sk_buff *skb[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS];
-	int db_index;
-	int dcb_index;
 	dma_addr_t dma;
 	struct napi_struct napi;
-	u32 channel_id;
 	struct net_device *ndev;
 	u64 packets;
 };
@@ -140,11 +121,7 @@ struct sparx5_rx {
  * DCBs are chained using the DCBs nextptr field.
  */
 struct sparx5_tx {
-	struct sparx5_tx_dcb_hw *curr_entry;
-	struct sparx5_tx_dcb_hw *first_entry;
-	struct list_head db_list;
-	dma_addr_t dma;
-	u32 channel_id;
+	struct fdma fdma;
 	u64 packets;
 	u64 dropped;
 };
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index 51d9423b08a6..f2a5a36fdacd 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -1442,18 +1442,8 @@ static void vcap_api_encode_rule_test(struct kunit *test)
 	vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0,
 			    rule->cookie, false);
 
-	vcap_free_rule(rule);
-
-	/* Check that the rule has been freed: tricky to access since this
-	 * memory should not be accessible anymore
-	 */
-	KUNIT_EXPECT_PTR_NE(test, NULL, rule);
-	ret = list_empty(&rule->keyfields);
-	KUNIT_EXPECT_EQ(test, true, ret);
-	ret = list_empty(&rule->actionfields);
-	KUNIT_EXPECT_EQ(test, true, ret);
-
-	vcap_del_rule(&test_vctrl, &test_netdev, id);
+	ret = vcap_del_rule(&test_vctrl, &test_netdev, id);
+	KUNIT_EXPECT_EQ(test, 0, ret);
 }
 
 static void vcap_api_set_rule_counter_test(struct kunit *test)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index ddb8f68d80a2..ca4ed58f1206 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1496,11 +1496,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto release_region;
 
-	err = dma_set_max_seg_size(&pdev->dev, UINT_MAX);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to set dma device segment size\n");
-		goto release_region;
-	}
+	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
 
 	err = -ENOMEM;
 	gc = vzalloc(sizeof(*gc));
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index cafded2f9382..a00f915c5188 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -52,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
 	return 0;
 }
 
+static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
+				struct hwc_work_request *req)
+{
+	struct device *dev = hwc_rxq->hwc->dev;
+	struct gdma_sge *sge;
+	int err;
+
+	sge = &req->sge;
+	sge->address = (u64)req->buf_sge_addr;
+	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
+	sge->size = req->buf_len;
+
+	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
+	req->wqe_req.sgl = sge;
+	req->wqe_req.num_sge = 1;
+	req->wqe_req.client_data_unit = 0;
+
+	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
+	if (err)
+		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
+	return err;
+}
+
 static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
-				 const struct gdma_resp_hdr *resp_msg)
+				 struct hwc_work_request *rx_req)
 {
+	const struct gdma_resp_hdr *resp_msg = rx_req->buf_va;
 	struct hwc_caller_ctx *ctx;
 	int err;
 
@@ -62,6 +86,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
 		      hwc->inflight_msg_res.map)) {
 		dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
 			resp_msg->response.hwc_msg_id);
+		mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
 		return;
 	}
 
@@ -75,30 +100,13 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
 	memcpy(ctx->output_buf, resp_msg, resp_len);
 out:
 	ctx->error = err;
-	complete(&ctx->comp_event);
-}
-
-static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
-				struct hwc_work_request *req)
-{
-	struct device *dev = hwc_rxq->hwc->dev;
-	struct gdma_sge *sge;
-	int err;
-
-	sge = &req->sge;
-	sge->address = (u64)req->buf_sge_addr;
-	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
-	sge->size = req->buf_len;
 
-	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
-	req->wqe_req.sgl = sge;
-	req->wqe_req.num_sge = 1;
-	req->wqe_req.client_data_unit = 0;
+	/* Must post rx wqe before complete(), otherwise the next rx may
+	 * hit no_wqe error.
+	 */
+	mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
 
-	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
-	if (err)
-		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
-	return err;
+	complete(&ctx->comp_event);
 }
 
 static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
@@ -235,14 +243,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
 		return;
 	}
 
-	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp);
+	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req);
 
-	/* Do no longer use 'resp', because the buffer is posted to the HW
-	 * in the below mana_hwc_post_rx_wqe().
+	/* Can no longer use 'resp', because the buffer is posted to the HW
+	 * in mana_hwc_handle_resp() above.
 	 */
 	resp = NULL;
-
-	mana_hwc_post_rx_wqe(hwc_rxq, rx_req);
 }
 
 static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index d2f07e179e86..c47266d1c7c2 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -511,7 +511,7 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
 }
 
 /* Release pre-allocated RX buffers */
-static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
+void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
 {
 	struct device *dev;
 	int i;
@@ -599,12 +599,16 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
 	else
 		*headroom = XDP_PACKET_HEADROOM;
 
-	*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
+	*alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
+
+	/* Using page pool in this case, so alloc_size is PAGE_SIZE */
+	if (*alloc_size < PAGE_SIZE)
+		*alloc_size = PAGE_SIZE;
 
 	*datasize = mtu + ETH_HLEN;
 }
 
-static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
+int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues)
 {
 	struct device *dev;
 	struct page *page;
@@ -618,7 +622,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
 
 	dev = mpc->ac->gdma_dev->gdma_context->dev;
 
-	num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE;
+	num_rxb = num_queues * mpc->rx_queue_size;
 
 	WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
 	mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
@@ -678,7 +682,7 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu)
 	int err;
 
 	/* Pre-allocate buffers to prevent failure in mana_attach later */
-	err = mana_pre_alloc_rxbufs(mpc, new_mtu);
+	err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues);
 	if (err) {
 		netdev_err(ndev, "Insufficient memory for new MTU\n");
 		return err;
@@ -1788,7 +1792,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
 static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
 {
 	struct mana_cq *cq = context;
-	u8 arm_bit;
 	int w;
 
 	WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
@@ -1799,16 +1802,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
 		mana_poll_tx_cq(cq);
 
 	w = cq->work_done;
-
-	if (w < cq->budget &&
-	    napi_complete_done(&cq->napi, w)) {
-		arm_bit = SET_ARM_BIT;
-	} else {
-		arm_bit = 0;
+	cq->work_done_since_doorbell += w;
+
+	if (w < cq->budget) {
+		mana_gd_ring_cq(gdma_queue, SET_ARM_BIT);
+		cq->work_done_since_doorbell = 0;
+		napi_complete_done(&cq->napi, w);
+	} else if (cq->work_done_since_doorbell >
+		   cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) {
+		/* MANA hardware requires at least one doorbell ring every 8
+		 * wraparounds of CQ even if there is no need to arm the CQ.
+		 * This driver rings the doorbell as soon as we have exceeded
+		 * 4 wraparounds.
+		 */
+		mana_gd_ring_cq(gdma_queue, 0);
+		cq->work_done_since_doorbell = 0;
 	}
 
-	mana_gd_ring_cq(gdma_queue, arm_bit);
-
 	return w;
 }
 
@@ -1862,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc)
 
 	for (i = 0; i < apc->num_queues; i++) {
 		napi = &apc->tx_qp[i].tx_cq.napi;
-		napi_synchronize(napi);
-		napi_disable(napi);
-		netif_napi_del(napi);
-
+		if (apc->tx_qp[i].txq.napi_initialized) {
+			napi_synchronize(napi);
+			napi_disable(napi);
+			netif_napi_del(napi);
+			apc->tx_qp[i].txq.napi_initialized = false;
+		}
 		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
 
 		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1899,15 +1911,17 @@ static int mana_create_txq(struct mana_port_context *apc,
 		return -ENOMEM;
 
 	/*  The minimum size of the WQE is 32 bytes, hence
-	 *  MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
+	 *  apc->tx_queue_size represents the maximum number of WQEs
 	 *  the SQ can store. This value is then used to size other queues
 	 *  to prevent overflow.
+	 *  Also note that the txq_size is always going to be MANA_PAGE_ALIGNED,
+	 *  as min val of apc->tx_queue_size is 128 and that would make
+	 *  txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size
+	 *  are always power of two
 	 */
-	txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32;
-	BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size));
+	txq_size = apc->tx_queue_size * 32;
 
-	cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE;
-	cq_size = MANA_PAGE_ALIGN(cq_size);
+	cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE;
 
 	gc = gd->gdma_context;
 
@@ -1921,6 +1935,7 @@ static int mana_create_txq(struct mana_port_context *apc,
 		txq->ndev = net;
 		txq->net_txq = netdev_get_tx_queue(net, i);
 		txq->vp_offset = apc->tx_vp_offset;
+		txq->napi_initialized = false;
 		skb_queue_head_init(&txq->pending_skbs);
 
 		memset(&spec, 0, sizeof(spec));
@@ -1987,6 +2002,7 @@ static int mana_create_txq(struct mana_port_context *apc,
 
 		netif_napi_add_tx(net, &cq->napi, mana_poll);
 		napi_enable(&cq->napi);
+		txq->napi_initialized = true;
 
 		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 	}
@@ -1998,7 +2014,7 @@ out:
 }
 
 static void mana_destroy_rxq(struct mana_port_context *apc,
-			     struct mana_rxq *rxq, bool validate_state)
+			     struct mana_rxq *rxq, bool napi_initialized)
 
 {
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
@@ -2013,15 +2029,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
 
 	napi = &rxq->rx_cq.napi;
 
-	if (validate_state)
+	if (napi_initialized) {
 		napi_synchronize(napi);
 
-	napi_disable(napi);
+		napi_disable(napi);
 
+		netif_napi_del(napi);
+	}
 	xdp_rxq_info_unreg(&rxq->xdp_rxq);
 
-	netif_napi_del(napi);
-
 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
 
 	mana_deinit_cq(apc, &rxq->rx_cq);
@@ -2145,10 +2161,11 @@ static int mana_push_wqe(struct mana_rxq *rxq)
 
 static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
 {
+	struct mana_port_context *mpc = netdev_priv(rxq->ndev);
 	struct page_pool_params pprm = {};
 	int ret;
 
-	pprm.pool_size = RX_BUFFERS_PER_QUEUE;
+	pprm.pool_size = mpc->rx_queue_size;
 	pprm.nid = gc->numa_node;
 	pprm.napi = &rxq->rx_cq.napi;
 	pprm.netdev = rxq->ndev;
@@ -2180,13 +2197,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	gc = gd->gdma_context;
 
-	rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE),
+	rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size),
 		      GFP_KERNEL);
 	if (!rxq)
 		return NULL;
 
 	rxq->ndev = ndev;
-	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
+	rxq->num_rx_buf = apc->rx_queue_size;
 	rxq->rxq_idx = rxq_idx;
 	rxq->rxobj = INVALID_MANA_HANDLE;
 
@@ -2734,6 +2751,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
 	apc->ndev = ndev;
 	apc->max_queues = gc->max_num_queues;
 	apc->num_queues = gc->max_num_queues;
+	apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE;
+	apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE;
 	apc->port_handle = INVALID_MANA_HANDLE;
 	apc->pf_filter_handle = INVALID_MANA_HANDLE;
 	apc->port_idx = port_idx;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 146d5db1792f..dc3864377538 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -345,27 +345,101 @@ static int mana_set_channels(struct net_device *ndev,
 	struct mana_port_context *apc = netdev_priv(ndev);
 	unsigned int new_count = channels->combined_count;
 	unsigned int old_count = apc->num_queues;
-	int err, err2;
+	int err;
+
+	err = mana_pre_alloc_rxbufs(apc, ndev->mtu, new_count);
+	if (err) {
+		netdev_err(ndev, "Insufficient memory for new allocations");
+		return err;
+	}
 
 	err = mana_detach(ndev, false);
 	if (err) {
 		netdev_err(ndev, "mana_detach failed: %d\n", err);
-		return err;
+		goto out;
 	}
 
 	apc->num_queues = new_count;
 	err = mana_attach(ndev);
-	if (!err)
-		return 0;
+	if (err) {
+		apc->num_queues = old_count;
+		netdev_err(ndev, "mana_attach failed: %d\n", err);
+	}
+
+out:
+	mana_pre_dealloc_rxbufs(apc);
+	return err;
+}
+
+static void mana_get_ringparam(struct net_device *ndev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+
+	ring->rx_pending = apc->rx_queue_size;
+	ring->tx_pending = apc->tx_queue_size;
+	ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE;
+	ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE;
+}
+
+static int mana_set_ringparam(struct net_device *ndev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kernel_ring,
+			      struct netlink_ext_ack *extack)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	u32 new_tx, new_rx;
+	u32 old_tx, old_rx;
+	int err;
 
-	netdev_err(ndev, "mana_attach failed: %d\n", err);
+	old_tx = apc->tx_queue_size;
+	old_rx = apc->rx_queue_size;
 
-	/* Try to roll it back to the old configuration. */
-	apc->num_queues = old_count;
-	err2 = mana_attach(ndev);
-	if (err2)
-		netdev_err(ndev, "mana re-attach failed: %d\n", err2);
+	if (ring->tx_pending < MIN_TX_BUFFERS_PER_QUEUE) {
+		NL_SET_ERR_MSG_FMT(extack, "tx:%d less than the min:%d", ring->tx_pending,
+				   MIN_TX_BUFFERS_PER_QUEUE);
+		return -EINVAL;
+	}
+
+	if (ring->rx_pending < MIN_RX_BUFFERS_PER_QUEUE) {
+		NL_SET_ERR_MSG_FMT(extack, "rx:%d less than the min:%d", ring->rx_pending,
+				   MIN_RX_BUFFERS_PER_QUEUE);
+		return -EINVAL;
+	}
+
+	new_rx = roundup_pow_of_two(ring->rx_pending);
+	new_tx = roundup_pow_of_two(ring->tx_pending);
+	netdev_info(ndev, "Using nearest power of 2 values for Txq:%d Rxq:%d\n",
+		    new_tx, new_rx);
+
+	/* pre-allocating new buffers to prevent failures in mana_attach() later */
+	apc->rx_queue_size = new_rx;
+	err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues);
+	apc->rx_queue_size = old_rx;
+	if (err) {
+		netdev_err(ndev, "Insufficient memory for new allocations\n");
+		return err;
+	}
 
+	err = mana_detach(ndev, false);
+	if (err) {
+		netdev_err(ndev, "mana_detach failed: %d\n", err);
+		goto out;
+	}
+
+	apc->tx_queue_size = new_tx;
+	apc->rx_queue_size = new_rx;
+
+	err = mana_attach(ndev);
+	if (err) {
+		netdev_err(ndev, "mana_attach failed: %d\n", err);
+		apc->tx_queue_size = old_tx;
+		apc->rx_queue_size = old_rx;
+	}
+out:
+	mana_pre_dealloc_rxbufs(apc);
 	return err;
 }
 
@@ -380,4 +454,6 @@ const struct ethtool_ops mana_ethtool_ops = {
 	.set_rxfh		= mana_set_rxfh,
 	.get_channels		= mana_get_channels,
 	.set_channels		= mana_set_channels,
+	.get_ringparam          = mana_get_ringparam,
+	.set_ringparam          = mana_set_ringparam,
 };
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index ed2fb44500b0..3d72aa7b1305 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot,
 	return VLAN_N_VID - bridge_num - 1;
 }
 
+/**
+ * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID
+ *
+ * @ocelot: Switch private data structure
+ * @port: Index of ingress port
+ *
+ * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN
+ * component conformance" suggest that a C-VLAN component should only recognize
+ * and filter on C-Tags, and an S-VLAN component should only recognize and
+ * process based on C-Tags.
+ *
+ * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN
+ * components are largely represented by a bridge with vlan_protocol 802.1Q,
+ * and S-VLAN components by a bridge with vlan_protocol 802.1ad.
+ *
+ * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware
+ * design is non-conformant, because the switch assigns each frame to a VLAN
+ * based on an entirely different question, as detailed in figure "Basic VLAN
+ * Classification Flow" from its manual and reproduced below.
+ *
+ * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register
+ * if VLAN_AWARE_ENA[port] and frame has outer tag then:
+ *   if VLAN_INNER_TAG_ENA[port] and frame has inner tag then:
+ *     TAG_TYPE = (Frame.InnerTPID <> 0x8100)
+ *     Set PCP, DEI, VID to values from inner VLAN header
+ *   else:
+ *     TAG_TYPE = (Frame.OuterTPID <> 0x8100)
+ *     Set PCP, DEI, VID to values from outer VLAN header
+ *   if VID == 0 then:
+ *     VID = VLAN_CFG.VLAN_VID
+ *
+ * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN
+ * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1
+ * (if 802.1ad). It will classify based on whichever of the tags is "outer", no
+ * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]).
+ *
+ * In the VLAN Table, the TAG_TYPE information is not accessible - just the
+ * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs:
+ * C-VLAN X, and S-VLAN X.
+ *
+ * Whereas the Linux bridge behavior is to only filter on frames with a TPID
+ * equal to the vlan_protocol, and treat everything else as VLAN-untagged.
+ *
+ * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5,
+ * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame
+ * should be treated as 802.1Q-untagged, and classified to the PVID of that
+ * bridge port. Not to VID=3, and not to VID=5.
+ *
+ * The VCAP IS1 TCAM has everything we need to overwrite the choices made in
+ * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key,
+ * and it can modify the classified VID in the action. Thus, for each port
+ * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to
+ * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q
+ * PVID of the port. This effectively makes it appear to the outside world as
+ * if those packets were processed as VLAN-untagged.
+ *
+ * The rule needs to be updated each time the bridge PVID changes, and needs
+ * to be deleted if the bridge PVID is deleted, or if the port becomes
+ * VLAN-unaware.
+ */
+static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port)
+{
+	unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port);
+	struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1];
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	const struct ocelot_bridge_vlan *pvid_vlan;
+	struct ocelot_vcap_filter *filter;
+	int err, val, pcp, dei;
+	bool vid_replace_ena;
+	u16 vid;
+
+	pvid_vlan = ocelot_port->pvid_vlan;
+	vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan;
+
+	filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie,
+						     false);
+	if (!vid_replace_ena) {
+		/* If the reclassification filter doesn't need to exist, delete
+		 * it if it was previously installed, and exit doing nothing
+		 * otherwise.
+		 */
+		if (filter)
+			return ocelot_vcap_filter_del(ocelot, filter);
+
+		return 0;
+	}
+
+	/* The reclassification rule must apply. See if it already exists
+	 * or if it must be created.
+	 */
+
+	/* Treating as VLAN-untagged means using as classified VID equal to
+	 * the bridge PVID, and PCP/DEI set to the port default QoS values.
+	 */
+	vid = pvid_vlan->vid;
+	val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port);
+	pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val);
+	dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL);
+
+	if (filter) {
+		bool changed = false;
+
+		/* Filter exists, just update it */
+		if (filter->action.vid != vid) {
+			filter->action.vid = vid;
+			changed = true;
+		}
+		if (filter->action.pcp != pcp) {
+			filter->action.pcp = pcp;
+			changed = true;
+		}
+		if (filter->action.dei != dei) {
+			filter->action.dei = dei;
+			changed = true;
+		}
+
+		if (!changed)
+			return 0;
+
+		return ocelot_vcap_filter_replace(ocelot, filter);
+	}
+
+	/* Filter doesn't exist, create it */
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->key_type = OCELOT_VCAP_KEY_ANY;
+	filter->ingress_port_mask = BIT(port);
+	filter->vlan.tpid = OCELOT_VCAP_BIT_1;
+	filter->prio = 1;
+	filter->id.cookie = cookie;
+	filter->id.tc_offload = false;
+	filter->block_id = VCAP_IS1;
+	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+	filter->lookup = 0;
+	filter->action.vid_replace_ena = true;
+	filter->action.pcp_dei_ena = true;
+	filter->action.vid = vid;
+	filter->action.pcp = pcp;
+	filter->action.dei = dei;
+
+	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	if (err)
+		kfree(filter);
+
+	return err;
+}
+
 /* Default vlan to clasify for untagged frames (may be zero) */
-static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
-				 const struct ocelot_bridge_vlan *pvid_vlan)
+static int ocelot_port_set_pvid(struct ocelot *ocelot, int port,
+				const struct ocelot_bridge_vlan *pvid_vlan)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge);
@@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
 	 * happens automatically), but also 802.1p traffic which gets
 	 * classified to VLAN 0, but that is always in our RX filter, so it
 	 * would get accepted were it not for this setting.
+	 *
+	 * Also, we only support the bridge 802.1Q VLAN protocol, so
+	 * 802.1ad-tagged frames (carrying S-Tags) should be considered
+	 * 802.1Q-untagged, and also dropped.
 	 */
 	if (!pvid_vlan && ocelot_port->vlan_aware)
 		val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
-		      ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA;
+		      ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA |
+		      ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA;
 
 	ocelot_rmw_gix(ocelot, val,
 		       ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
-		       ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA,
+		       ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA |
+		       ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA,
 		       ANA_PORT_DROP_CFG, port);
+
+	return ocelot_update_vlan_reclassify_rule(ocelot, port);
 }
 
 static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot,
@@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
 		       ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M,
 		       ANA_PORT_VLAN_CFG, port);
 
-	ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan);
+	err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan);
+	if (err)
+		return err;
+
 	ocelot_port_manage_port_tag(ocelot, port);
 
 	return 0;
@@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
 		return err;
 
 	/* Default ingress vlan classification */
-	if (pvid)
-		ocelot_port_set_pvid(ocelot, port,
-				     ocelot_bridge_vlan_find(ocelot, vid));
+	if (pvid) {
+		err = ocelot_port_set_pvid(ocelot, port,
+					   ocelot_bridge_vlan_find(ocelot, vid));
+		if (err)
+			return err;
+	}
 
 	/* Untagged egress vlan clasification */
 	ocelot_port_manage_port_tag(ocelot, port);
@@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
 		return err;
 
 	/* Ingress */
-	if (del_pvid)
-		ocelot_port_set_pvid(ocelot, port, NULL);
+	if (del_pvid) {
+		err = ocelot_port_set_pvid(ocelot, port, NULL);
+		if (err)
+			return err;
+	}
 
 	/* Egress */
 	ocelot_port_manage_port_tag(ocelot, port);
@@ -1099,6 +1265,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(ocelot_ptp_rx_timestamp);
 
+void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp)
+			 __acquires(&ocelot->inj_lock)
+{
+	spin_lock(&ocelot->inj_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp);
+
+void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp)
+			   __releases(&ocelot->inj_lock)
+{
+	spin_unlock(&ocelot->inj_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp);
+
+void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp)
+			 __acquires(&ocelot->inj_lock)
+{
+	spin_lock(&ocelot->inj_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp);
+
+void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp)
+			   __releases(&ocelot->inj_lock)
+{
+	spin_unlock(&ocelot->inj_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp);
+
+void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp)
+			    __acquires(&ocelot->xtr_lock)
+{
+	spin_lock_bh(&ocelot->xtr_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh);
+
+void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp)
+			      __releases(&ocelot->xtr_lock)
+{
+	spin_unlock_bh(&ocelot->xtr_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh);
+
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 {
 	u64 timestamp, src_port, len;
@@ -1109,6 +1317,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 	u32 val, *buf;
 	int err;
 
+	lockdep_assert_held(&ocelot->xtr_lock);
+
 	err = ocelot_xtr_poll_xfh(ocelot, grp, xfh);
 	if (err)
 		return err;
@@ -1184,6 +1394,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp)
 {
 	u32 val = ocelot_read(ocelot, QS_INJ_STATUS);
 
+	lockdep_assert_held(&ocelot->inj_lock);
+
 	if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))))
 		return false;
 	if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp)))
@@ -1193,28 +1405,55 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp)
 }
 EXPORT_SYMBOL(ocelot_can_inject);
 
-void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag)
+/**
+ * ocelot_ifh_set_basic - Set basic information in Injection Frame Header
+ * @ifh: Pointer to Injection Frame Header memory
+ * @ocelot: Switch private data structure
+ * @port: Egress port number
+ * @rew_op: Egress rewriter operation for PTP
+ * @skb: Pointer to socket buffer (packet)
+ *
+ * Populate the Injection Frame Header with basic information for this skb: the
+ * analyzer bypass bit, destination port, VLAN info, egress rewriter info.
+ */
+void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port,
+			  u32 rew_op, struct sk_buff *skb)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	struct net_device *dev = skb->dev;
+	u64 vlan_tci, tag_type;
+	int qos_class;
+
+	ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci,
+				  &tag_type);
+
+	qos_class = netdev_get_num_tc(dev) ?
+		    netdev_get_prio_tc_map(dev, skb->priority) : skb->priority;
+
+	memset(ifh, 0, OCELOT_TAG_LEN);
 	ocelot_ifh_set_bypass(ifh, 1);
+	ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports));
 	ocelot_ifh_set_dest(ifh, BIT_ULL(port));
-	ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C);
-	if (vlan_tag)
-		ocelot_ifh_set_vlan_tci(ifh, vlan_tag);
+	ocelot_ifh_set_qos_class(ifh, qos_class);
+	ocelot_ifh_set_tag_type(ifh, tag_type);
+	ocelot_ifh_set_vlan_tci(ifh, vlan_tci);
 	if (rew_op)
 		ocelot_ifh_set_rew_op(ifh, rew_op);
 }
-EXPORT_SYMBOL(ocelot_ifh_port_set);
+EXPORT_SYMBOL(ocelot_ifh_set_basic);
 
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 			      u32 rew_op, struct sk_buff *skb)
 {
-	u32 ifh[OCELOT_TAG_LEN / 4] = {0};
+	u32 ifh[OCELOT_TAG_LEN / 4];
 	unsigned int i, count, last;
 
+	lockdep_assert_held(&ocelot->inj_lock);
+
 	ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) |
 			 QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp);
 
-	ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb));
+	ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb);
 
 	for (i = 0; i < OCELOT_TAG_LEN / 4; i++)
 		ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp);
@@ -1247,6 +1486,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame);
 
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
 {
+	lockdep_assert_held(&ocelot->xtr_lock);
+
 	while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp))
 		ocelot_read_rix(ocelot, QS_XTR_RD, grp);
 }
@@ -2532,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio)
 		       ANA_PORT_QOS_CFG,
 		       port);
 
-	return 0;
+	return ocelot_update_vlan_reclassify_rule(ocelot, port);
 }
 EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio);
 
@@ -2929,6 +3170,8 @@ int ocelot_init(struct ocelot *ocelot)
 	mutex_init(&ocelot->fwd_domain_lock);
 	spin_lock_init(&ocelot->ptp_clock_lock);
 	spin_lock_init(&ocelot->ts_id_lock);
+	spin_lock_init(&ocelot->inj_lock);
+	spin_lock_init(&ocelot->xtr_lock);
 
 	ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0);
 	if (!ocelot->owq)
diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c
index 312a46832154..00326ae8c708 100644
--- a/drivers/net/ethernet/mscc/ocelot_fdma.c
+++ b/drivers/net/ethernet/mscc/ocelot_fdma.c
@@ -665,8 +665,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op,
 
 	ifh = skb_push(skb, OCELOT_TAG_LEN);
 	skb_put(skb, ETH_FCS_LEN);
-	memset(ifh, 0, OCELOT_TAG_LEN);
-	ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb));
+	ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index b3c28260adf8..e172638b0601 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -582,17 +582,13 @@ EXPORT_SYMBOL(ocelot_hwstamp_set);
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 		       struct kernel_ethtool_ts_info *info)
 {
-	info->phc_index = ocelot->ptp_clock ?
-			  ptp_clock_index(ocelot->ptp_clock) : -1;
-	if (info->phc_index == -1) {
-		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-					 SOF_TIMESTAMPING_RX_SOFTWARE |
-					 SOF_TIMESTAMPING_SOFTWARE;
+	if (ocelot->ptp_clock) {
+		info->phc_index = ptp_clock_index(ocelot->ptp_clock);
+	} else {
+		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	}
 	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-				 SOF_TIMESTAMPING_RX_SOFTWARE |
-				 SOF_TIMESTAMPING_SOFTWARE |
 				 SOF_TIMESTAMPING_TX_HARDWARE |
 				 SOF_TIMESTAMPING_RX_HARDWARE |
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index 73cdec5ca6a3..5734b86aed5b 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix,
 	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc);
 	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc);
 	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged);
+	vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid);
 	vcap_key_set(vcap, &data, VCAP_IS1_HK_VID,
 		     tag->vid.value, tag->vid.mask);
 	vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP,
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 993212c3a7da..c09dd2e3343c 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 	struct ocelot *ocelot = arg;
 	int grp = 0, err;
 
+	ocelot_lock_xtr_grp(ocelot, grp);
+
 	while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) {
 		struct sk_buff *skb;
 
@@ -69,6 +71,8 @@ out:
 	if (err < 0)
 		ocelot_drain_cpu_queue(ocelot, 0);
 
+	ocelot_unlock_xtr_grp(ocelot, grp);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index df2ab5cbd49b..3a02eef58cc6 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -4537,8 +4537,8 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
 	u64 *prog;
 	int err;
 
-	prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
-		       GFP_KERNEL);
+	prog = kmemdup_array(nfp_prog->prog, nfp_prog->prog_len, sizeof(u64),
+			     GFP_KERNEL);
 	if (!prog)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 182ba0a8b095..6e0929af0f72 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -821,14 +821,13 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 
 	snprintf(r_vec->name, sizeof(r_vec->name),
 		 "%s-rxtx-%d", nfp_net_name(nn), idx);
-	err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
-			  r_vec);
+	err = request_irq(r_vec->irq_vector, r_vec->handler, IRQF_NO_AUTOEN,
+			  r_vec->name, r_vec);
 	if (err) {
 		nfp_net_napi_del(&nn->dp, r_vec);
 		nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
 		return err;
 	}
-	disable_irq(r_vec->irq_vector);
 
 	irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
index 2dd37557185e..7276e44a21d0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
@@ -41,6 +41,8 @@ struct nfp_dump_tl {
 	);
 	char data[];
 };
+static_assert(offsetof(struct nfp_dump_tl, data) == sizeof(struct nfp_dump_tl_hdr),
+	      "struct member likely outside of struct_group_tagged()");
 
 /* NFP CPP parameters */
 struct nfp_dumpspec_cpp_isl_id {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index eee0bfc41074..227e7a5d712e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -248,7 +248,6 @@ nfp_repr_fix_features(struct net_device *netdev, netdev_features_t features)
 
 	features = netdev_intersect_features(features, lower_features);
 	features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_HW_TC);
-	features |= NETIF_F_LLTX;
 
 	return features;
 }
@@ -386,7 +385,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS);
 
 	netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
-	netdev->features |= NETIF_F_LLTX;
+	netdev->lltx = true;
 
 	if (nfp_app_has_tc(app)) {
 		netdev->features |= NETIF_F_HW_TC;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 7136bc48530b..df0234a338a8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -278,7 +278,7 @@ struct nfp_nsp *nfp_nsp_open(struct nfp_cpp *cpp)
 
 	res = nfp_resource_acquire(cpp, NFP_RESOURCE_NSP);
 	if (IS_ERR(res))
-		return (void *)res;
+		return ERR_CAST(res);
 
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state) {
diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
new file mode 100644
index 000000000000..f9c0dcd965c2
--- /dev/null
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -0,0 +1,1361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface framework
+ *
+ * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/oa_tc6.h>
+
+/* OPEN Alliance TC6 registers */
+/* Standard Capabilities Register */
+#define OA_TC6_REG_STDCAP			0x0002
+#define STDCAP_DIRECT_PHY_REG_ACCESS		BIT(8)
+
+/* Reset Control and Status Register */
+#define OA_TC6_REG_RESET			0x0003
+#define RESET_SWRESET				BIT(0)	/* Software Reset */
+
+/* Configuration Register #0 */
+#define OA_TC6_REG_CONFIG0			0x0004
+#define CONFIG0_SYNC				BIT(15)
+#define CONFIG0_ZARFE_ENABLE			BIT(12)
+
+/* Status Register #0 */
+#define OA_TC6_REG_STATUS0			0x0008
+#define STATUS0_RESETC				BIT(6)	/* Reset Complete */
+#define STATUS0_HEADER_ERROR			BIT(5)
+#define STATUS0_LOSS_OF_FRAME_ERROR		BIT(4)
+#define STATUS0_RX_BUFFER_OVERFLOW_ERROR	BIT(3)
+#define STATUS0_TX_PROTOCOL_ERROR		BIT(0)
+
+/* Buffer Status Register */
+#define OA_TC6_REG_BUFFER_STATUS		0x000B
+#define BUFFER_STATUS_TX_CREDITS_AVAILABLE	GENMASK(15, 8)
+#define BUFFER_STATUS_RX_CHUNKS_AVAILABLE	GENMASK(7, 0)
+
+/* Interrupt Mask Register #0 */
+#define OA_TC6_REG_INT_MASK0			0x000C
+#define INT_MASK0_HEADER_ERR_MASK		BIT(5)
+#define INT_MASK0_LOSS_OF_FRAME_ERR_MASK	BIT(4)
+#define INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK	BIT(3)
+#define INT_MASK0_TX_PROTOCOL_ERR_MASK		BIT(0)
+
+/* PHY Clause 22 registers base address and mask */
+#define OA_TC6_PHY_STD_REG_ADDR_BASE		0xFF00
+#define OA_TC6_PHY_STD_REG_ADDR_MASK		0x1F
+
+/* Control command header */
+#define OA_TC6_CTRL_HEADER_DATA_NOT_CTRL	BIT(31)
+#define OA_TC6_CTRL_HEADER_WRITE_NOT_READ	BIT(29)
+#define OA_TC6_CTRL_HEADER_MEM_MAP_SELECTOR	GENMASK(27, 24)
+#define OA_TC6_CTRL_HEADER_ADDR			GENMASK(23, 8)
+#define OA_TC6_CTRL_HEADER_LENGTH		GENMASK(7, 1)
+#define OA_TC6_CTRL_HEADER_PARITY		BIT(0)
+
+/* Data header */
+#define OA_TC6_DATA_HEADER_DATA_NOT_CTRL	BIT(31)
+#define OA_TC6_DATA_HEADER_DATA_VALID		BIT(21)
+#define OA_TC6_DATA_HEADER_START_VALID		BIT(20)
+#define OA_TC6_DATA_HEADER_START_WORD_OFFSET	GENMASK(19, 16)
+#define OA_TC6_DATA_HEADER_END_VALID		BIT(14)
+#define OA_TC6_DATA_HEADER_END_BYTE_OFFSET	GENMASK(13, 8)
+#define OA_TC6_DATA_HEADER_PARITY		BIT(0)
+
+/* Data footer */
+#define OA_TC6_DATA_FOOTER_EXTENDED_STS		BIT(31)
+#define OA_TC6_DATA_FOOTER_RXD_HEADER_BAD	BIT(30)
+#define OA_TC6_DATA_FOOTER_CONFIG_SYNC		BIT(29)
+#define OA_TC6_DATA_FOOTER_RX_CHUNKS		GENMASK(28, 24)
+#define OA_TC6_DATA_FOOTER_DATA_VALID		BIT(21)
+#define OA_TC6_DATA_FOOTER_START_VALID		BIT(20)
+#define OA_TC6_DATA_FOOTER_START_WORD_OFFSET	GENMASK(19, 16)
+#define OA_TC6_DATA_FOOTER_END_VALID		BIT(14)
+#define OA_TC6_DATA_FOOTER_END_BYTE_OFFSET	GENMASK(13, 8)
+#define OA_TC6_DATA_FOOTER_TX_CREDITS		GENMASK(5, 1)
+
+/* PHY – Clause 45 registers memory map selector (MMS) as per table 6 in the
+ * OPEN Alliance specification.
+ */
+#define OA_TC6_PHY_C45_PCS_MMS2			2	/* MMD 3 */
+#define OA_TC6_PHY_C45_PMA_PMD_MMS3		3	/* MMD 1 */
+#define OA_TC6_PHY_C45_VS_PLCA_MMS4		4	/* MMD 31 */
+#define OA_TC6_PHY_C45_AUTO_NEG_MMS5		5	/* MMD 7 */
+#define OA_TC6_PHY_C45_POWER_UNIT_MMS6		6	/* MMD 13 */
+
+#define OA_TC6_CTRL_HEADER_SIZE			4
+#define OA_TC6_CTRL_REG_VALUE_SIZE		4
+#define OA_TC6_CTRL_IGNORED_SIZE		4
+#define OA_TC6_CTRL_MAX_REGISTERS		128
+#define OA_TC6_CTRL_SPI_BUF_SIZE		(OA_TC6_CTRL_HEADER_SIZE +\
+						(OA_TC6_CTRL_MAX_REGISTERS *\
+						OA_TC6_CTRL_REG_VALUE_SIZE) +\
+						OA_TC6_CTRL_IGNORED_SIZE)
+#define OA_TC6_CHUNK_PAYLOAD_SIZE		64
+#define OA_TC6_DATA_HEADER_SIZE			4
+#define OA_TC6_CHUNK_SIZE			(OA_TC6_DATA_HEADER_SIZE +\
+						OA_TC6_CHUNK_PAYLOAD_SIZE)
+#define OA_TC6_MAX_TX_CHUNKS			48
+#define OA_TC6_SPI_DATA_BUF_SIZE		(OA_TC6_MAX_TX_CHUNKS *\
+						OA_TC6_CHUNK_SIZE)
+#define STATUS0_RESETC_POLL_DELAY		1000
+#define STATUS0_RESETC_POLL_TIMEOUT		1000000
+
+/* Internal structure for MAC-PHY drivers */
+struct oa_tc6 {
+	struct device *dev;
+	struct net_device *netdev;
+	struct phy_device *phydev;
+	struct mii_bus *mdiobus;
+	struct spi_device *spi;
+	struct mutex spi_ctrl_lock; /* Protects spi control transfer */
+	void *spi_ctrl_tx_buf;
+	void *spi_ctrl_rx_buf;
+	void *spi_data_tx_buf;
+	void *spi_data_rx_buf;
+	struct sk_buff *ongoing_tx_skb;
+	struct sk_buff *waiting_tx_skb;
+	struct sk_buff *rx_skb;
+	struct task_struct *spi_thread;
+	wait_queue_head_t spi_wq;
+	u16 tx_skb_offset;
+	u16 spi_data_tx_buf_offset;
+	u16 tx_credits;
+	u8 rx_chunks_available;
+	bool rx_buf_overflow;
+	bool int_flag;
+};
+
+enum oa_tc6_header_type {
+	OA_TC6_CTRL_HEADER,
+	OA_TC6_DATA_HEADER,
+};
+
+enum oa_tc6_register_op {
+	OA_TC6_CTRL_REG_READ = 0,
+	OA_TC6_CTRL_REG_WRITE = 1,
+};
+
+enum oa_tc6_data_valid_info {
+	OA_TC6_DATA_INVALID,
+	OA_TC6_DATA_VALID,
+};
+
+enum oa_tc6_data_start_valid_info {
+	OA_TC6_DATA_START_INVALID,
+	OA_TC6_DATA_START_VALID,
+};
+
+enum oa_tc6_data_end_valid_info {
+	OA_TC6_DATA_END_INVALID,
+	OA_TC6_DATA_END_VALID,
+};
+
+static int oa_tc6_spi_transfer(struct oa_tc6 *tc6,
+			       enum oa_tc6_header_type header_type, u16 length)
+{
+	struct spi_transfer xfer = { 0 };
+	struct spi_message msg;
+
+	if (header_type == OA_TC6_DATA_HEADER) {
+		xfer.tx_buf = tc6->spi_data_tx_buf;
+		xfer.rx_buf = tc6->spi_data_rx_buf;
+	} else {
+		xfer.tx_buf = tc6->spi_ctrl_tx_buf;
+		xfer.rx_buf = tc6->spi_ctrl_rx_buf;
+	}
+	xfer.len = length;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	return spi_sync(tc6->spi, &msg);
+}
+
+static int oa_tc6_get_parity(u32 p)
+{
+	/* Public domain code snippet, lifted from
+	 * http://www-graphics.stanford.edu/~seander/bithacks.html
+	 */
+	p ^= p >> 1;
+	p ^= p >> 2;
+	p = (p & 0x11111111U) * 0x11111111U;
+
+	/* Odd parity is used here */
+	return !((p >> 28) & 1);
+}
+
+static __be32 oa_tc6_prepare_ctrl_header(u32 addr, u8 length,
+					 enum oa_tc6_register_op reg_op)
+{
+	u32 header;
+
+	header = FIELD_PREP(OA_TC6_CTRL_HEADER_DATA_NOT_CTRL,
+			    OA_TC6_CTRL_HEADER) |
+		 FIELD_PREP(OA_TC6_CTRL_HEADER_WRITE_NOT_READ, reg_op) |
+		 FIELD_PREP(OA_TC6_CTRL_HEADER_MEM_MAP_SELECTOR, addr >> 16) |
+		 FIELD_PREP(OA_TC6_CTRL_HEADER_ADDR, addr) |
+		 FIELD_PREP(OA_TC6_CTRL_HEADER_LENGTH, length - 1);
+	header |= FIELD_PREP(OA_TC6_CTRL_HEADER_PARITY,
+			     oa_tc6_get_parity(header));
+
+	return cpu_to_be32(header);
+}
+
+static void oa_tc6_update_ctrl_write_data(struct oa_tc6 *tc6, u32 value[],
+					  u8 length)
+{
+	__be32 *tx_buf = tc6->spi_ctrl_tx_buf + OA_TC6_CTRL_HEADER_SIZE;
+
+	for (int i = 0; i < length; i++)
+		*tx_buf++ = cpu_to_be32(value[i]);
+}
+
+static u16 oa_tc6_calculate_ctrl_buf_size(u8 length)
+{
+	/* Control command consists 4 bytes header + 4 bytes register value for
+	 * each register + 4 bytes ignored value.
+	 */
+	return OA_TC6_CTRL_HEADER_SIZE + OA_TC6_CTRL_REG_VALUE_SIZE * length +
+	       OA_TC6_CTRL_IGNORED_SIZE;
+}
+
+static void oa_tc6_prepare_ctrl_spi_buf(struct oa_tc6 *tc6, u32 address,
+					u32 value[], u8 length,
+					enum oa_tc6_register_op reg_op)
+{
+	__be32 *tx_buf = tc6->spi_ctrl_tx_buf;
+
+	*tx_buf = oa_tc6_prepare_ctrl_header(address, length, reg_op);
+
+	if (reg_op == OA_TC6_CTRL_REG_WRITE)
+		oa_tc6_update_ctrl_write_data(tc6, value, length);
+}
+
+static int oa_tc6_check_ctrl_write_reply(struct oa_tc6 *tc6, u8 size)
+{
+	u8 *tx_buf = tc6->spi_ctrl_tx_buf;
+	u8 *rx_buf = tc6->spi_ctrl_rx_buf;
+
+	rx_buf += OA_TC6_CTRL_IGNORED_SIZE;
+
+	/* The echoed control write must match with the one that was
+	 * transmitted.
+	 */
+	if (memcmp(tx_buf, rx_buf, size - OA_TC6_CTRL_IGNORED_SIZE))
+		return -EPROTO;
+
+	return 0;
+}
+
+static int oa_tc6_check_ctrl_read_reply(struct oa_tc6 *tc6, u8 size)
+{
+	u32 *rx_buf = tc6->spi_ctrl_rx_buf + OA_TC6_CTRL_IGNORED_SIZE;
+	u32 *tx_buf = tc6->spi_ctrl_tx_buf;
+
+	/* The echoed control read header must match with the one that was
+	 * transmitted.
+	 */
+	if (*tx_buf != *rx_buf)
+		return -EPROTO;
+
+	return 0;
+}
+
+static void oa_tc6_copy_ctrl_read_data(struct oa_tc6 *tc6, u32 value[],
+				       u8 length)
+{
+	__be32 *rx_buf = tc6->spi_ctrl_rx_buf + OA_TC6_CTRL_IGNORED_SIZE +
+			 OA_TC6_CTRL_HEADER_SIZE;
+
+	for (int i = 0; i < length; i++)
+		value[i] = be32_to_cpu(*rx_buf++);
+}
+
+static int oa_tc6_perform_ctrl(struct oa_tc6 *tc6, u32 address, u32 value[],
+			       u8 length, enum oa_tc6_register_op reg_op)
+{
+	u16 size;
+	int ret;
+
+	/* Prepare control command and copy to SPI control buffer */
+	oa_tc6_prepare_ctrl_spi_buf(tc6, address, value, length, reg_op);
+
+	size = oa_tc6_calculate_ctrl_buf_size(length);
+
+	/* Perform SPI transfer */
+	ret = oa_tc6_spi_transfer(tc6, OA_TC6_CTRL_HEADER, size);
+	if (ret) {
+		dev_err(&tc6->spi->dev, "SPI transfer failed for control: %d\n",
+			ret);
+		return ret;
+	}
+
+	/* Check echoed/received control write command reply for errors */
+	if (reg_op == OA_TC6_CTRL_REG_WRITE)
+		return oa_tc6_check_ctrl_write_reply(tc6, size);
+
+	/* Check echoed/received control read command reply for errors */
+	ret = oa_tc6_check_ctrl_read_reply(tc6, size);
+	if (ret)
+		return ret;
+
+	oa_tc6_copy_ctrl_read_data(tc6, value, length);
+
+	return 0;
+}
+
+/**
+ * oa_tc6_read_registers - function for reading multiple consecutive registers.
+ * @tc6: oa_tc6 struct.
+ * @address: address of the first register to be read in the MAC-PHY.
+ * @value: values to be read from the starting register address @address.
+ * @length: number of consecutive registers to be read from @address.
+ *
+ * Maximum of 128 consecutive registers can be read starting at @address.
+ *
+ * Return: 0 on success otherwise failed.
+ */
+int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
+			  u8 length)
+{
+	int ret;
+
+	if (!length || length > OA_TC6_CTRL_MAX_REGISTERS) {
+		dev_err(&tc6->spi->dev, "Invalid register length parameter\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&tc6->spi_ctrl_lock);
+	ret = oa_tc6_perform_ctrl(tc6, address, value, length,
+				  OA_TC6_CTRL_REG_READ);
+	mutex_unlock(&tc6->spi_ctrl_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(oa_tc6_read_registers);
+
+/**
+ * oa_tc6_read_register - function for reading a MAC-PHY register.
+ * @tc6: oa_tc6 struct.
+ * @address: register address of the MAC-PHY to be read.
+ * @value: value read from the @address register address of the MAC-PHY.
+ *
+ * Return: 0 on success otherwise failed.
+ */
+int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value)
+{
+	return oa_tc6_read_registers(tc6, address, value, 1);
+}
+EXPORT_SYMBOL_GPL(oa_tc6_read_register);
+
+/**
+ * oa_tc6_write_registers - function for writing multiple consecutive registers.
+ * @tc6: oa_tc6 struct.
+ * @address: address of the first register to be written in the MAC-PHY.
+ * @value: values to be written from the starting register address @address.
+ * @length: number of consecutive registers to be written from @address.
+ *
+ * Maximum of 128 consecutive registers can be written starting at @address.
+ *
+ * Return: 0 on success otherwise failed.
+ */
+int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[],
+			   u8 length)
+{
+	int ret;
+
+	if (!length || length > OA_TC6_CTRL_MAX_REGISTERS) {
+		dev_err(&tc6->spi->dev, "Invalid register length parameter\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&tc6->spi_ctrl_lock);
+	ret = oa_tc6_perform_ctrl(tc6, address, value, length,
+				  OA_TC6_CTRL_REG_WRITE);
+	mutex_unlock(&tc6->spi_ctrl_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(oa_tc6_write_registers);
+
+/**
+ * oa_tc6_write_register - function for writing a MAC-PHY register.
+ * @tc6: oa_tc6 struct.
+ * @address: register address of the MAC-PHY to be written.
+ * @value: value to be written in the @address register address of the MAC-PHY.
+ *
+ * Return: 0 on success otherwise failed.
+ */
+int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value)
+{
+	return oa_tc6_write_registers(tc6, address, &value, 1);
+}
+EXPORT_SYMBOL_GPL(oa_tc6_write_register);
+
+static int oa_tc6_check_phy_reg_direct_access_capability(struct oa_tc6 *tc6)
+{
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_STDCAP, &regval);
+	if (ret)
+		return ret;
+
+	if (!(regval & STDCAP_DIRECT_PHY_REG_ACCESS))
+		return -ENODEV;
+
+	return 0;
+}
+
+static void oa_tc6_handle_link_change(struct net_device *netdev)
+{
+	phy_print_status(netdev->phydev);
+}
+
+static int oa_tc6_mdiobus_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct oa_tc6 *tc6 = bus->priv;
+	u32 regval;
+	bool ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_PHY_STD_REG_ADDR_BASE |
+				   (regnum & OA_TC6_PHY_STD_REG_ADDR_MASK),
+				   &regval);
+	if (ret)
+		return ret;
+
+	return regval;
+}
+
+static int oa_tc6_mdiobus_write(struct mii_bus *bus, int addr, int regnum,
+				u16 val)
+{
+	struct oa_tc6 *tc6 = bus->priv;
+
+	return oa_tc6_write_register(tc6, OA_TC6_PHY_STD_REG_ADDR_BASE |
+				     (regnum & OA_TC6_PHY_STD_REG_ADDR_MASK),
+				     val);
+}
+
+static int oa_tc6_get_phy_c45_mms(int devnum)
+{
+	switch (devnum) {
+	case MDIO_MMD_PCS:
+		return OA_TC6_PHY_C45_PCS_MMS2;
+	case MDIO_MMD_PMAPMD:
+		return OA_TC6_PHY_C45_PMA_PMD_MMS3;
+	case MDIO_MMD_VEND2:
+		return OA_TC6_PHY_C45_VS_PLCA_MMS4;
+	case MDIO_MMD_AN:
+		return OA_TC6_PHY_C45_AUTO_NEG_MMS5;
+	case MDIO_MMD_POWER_UNIT:
+		return OA_TC6_PHY_C45_POWER_UNIT_MMS6;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int oa_tc6_mdiobus_read_c45(struct mii_bus *bus, int addr, int devnum,
+				   int regnum)
+{
+	struct oa_tc6 *tc6 = bus->priv;
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_get_phy_c45_mms(devnum);
+	if (ret < 0)
+		return ret;
+
+	ret = oa_tc6_read_register(tc6, (ret << 16) | regnum, &regval);
+	if (ret)
+		return ret;
+
+	return regval;
+}
+
+static int oa_tc6_mdiobus_write_c45(struct mii_bus *bus, int addr, int devnum,
+				    int regnum, u16 val)
+{
+	struct oa_tc6 *tc6 = bus->priv;
+	int ret;
+
+	ret = oa_tc6_get_phy_c45_mms(devnum);
+	if (ret < 0)
+		return ret;
+
+	return oa_tc6_write_register(tc6, (ret << 16) | regnum, val);
+}
+
+static int oa_tc6_mdiobus_register(struct oa_tc6 *tc6)
+{
+	int ret;
+
+	tc6->mdiobus = mdiobus_alloc();
+	if (!tc6->mdiobus) {
+		netdev_err(tc6->netdev, "MDIO bus alloc failed\n");
+		return -ENOMEM;
+	}
+
+	tc6->mdiobus->priv = tc6;
+	tc6->mdiobus->read = oa_tc6_mdiobus_read;
+	tc6->mdiobus->write = oa_tc6_mdiobus_write;
+	/* OPEN Alliance 10BASE-T1x compliance MAC-PHYs will have both C22 and
+	 * C45 registers space. If the PHY is discovered via C22 bus protocol it
+	 * assumes it uses C22 protocol and always uses C22 registers indirect
+	 * access to access C45 registers. This is because, we don't have a
+	 * clean separation between C22/C45 register space and C22/C45 MDIO bus
+	 * protocols. Resulting, PHY C45 registers direct access can't be used
+	 * which can save multiple SPI bus access. To support this feature, PHY
+	 * drivers can set .read_mmd/.write_mmd in the PHY driver to call
+	 * .read_c45/.write_c45. Ex: drivers/net/phy/microchip_t1s.c
+	 */
+	tc6->mdiobus->read_c45 = oa_tc6_mdiobus_read_c45;
+	tc6->mdiobus->write_c45 = oa_tc6_mdiobus_write_c45;
+	tc6->mdiobus->name = "oa-tc6-mdiobus";
+	tc6->mdiobus->parent = tc6->dev;
+
+	snprintf(tc6->mdiobus->id, ARRAY_SIZE(tc6->mdiobus->id), "%s",
+		 dev_name(&tc6->spi->dev));
+
+	ret = mdiobus_register(tc6->mdiobus);
+	if (ret) {
+		netdev_err(tc6->netdev, "Could not register MDIO bus\n");
+		mdiobus_free(tc6->mdiobus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void oa_tc6_mdiobus_unregister(struct oa_tc6 *tc6)
+{
+	mdiobus_unregister(tc6->mdiobus);
+	mdiobus_free(tc6->mdiobus);
+}
+
+static int oa_tc6_phy_init(struct oa_tc6 *tc6)
+{
+	int ret;
+
+	ret = oa_tc6_check_phy_reg_direct_access_capability(tc6);
+	if (ret) {
+		netdev_err(tc6->netdev,
+			   "Direct PHY register access is not supported by the MAC-PHY\n");
+		return ret;
+	}
+
+	ret = oa_tc6_mdiobus_register(tc6);
+	if (ret)
+		return ret;
+
+	tc6->phydev = phy_find_first(tc6->mdiobus);
+	if (!tc6->phydev) {
+		netdev_err(tc6->netdev, "No PHY found\n");
+		oa_tc6_mdiobus_unregister(tc6);
+		return -ENODEV;
+	}
+
+	tc6->phydev->is_internal = true;
+	ret = phy_connect_direct(tc6->netdev, tc6->phydev,
+				 &oa_tc6_handle_link_change,
+				 PHY_INTERFACE_MODE_INTERNAL);
+	if (ret) {
+		netdev_err(tc6->netdev, "Can't attach PHY to %s\n",
+			   tc6->mdiobus->id);
+		oa_tc6_mdiobus_unregister(tc6);
+		return ret;
+	}
+
+	phy_attached_info(tc6->netdev->phydev);
+
+	return 0;
+}
+
+static void oa_tc6_phy_exit(struct oa_tc6 *tc6)
+{
+	phy_disconnect(tc6->phydev);
+	oa_tc6_mdiobus_unregister(tc6);
+}
+
+static int oa_tc6_read_status0(struct oa_tc6 *tc6)
+{
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_STATUS0, &regval);
+	if (ret) {
+		dev_err(&tc6->spi->dev, "STATUS0 register read failed: %d\n",
+			ret);
+		return 0;
+	}
+
+	return regval;
+}
+
+static int oa_tc6_sw_reset_macphy(struct oa_tc6 *tc6)
+{
+	u32 regval = RESET_SWRESET;
+	int ret;
+
+	ret = oa_tc6_write_register(tc6, OA_TC6_REG_RESET, regval);
+	if (ret)
+		return ret;
+
+	/* Poll for soft reset complete for every 1ms until 1s timeout */
+	ret = readx_poll_timeout(oa_tc6_read_status0, tc6, regval,
+				 regval & STATUS0_RESETC,
+				 STATUS0_RESETC_POLL_DELAY,
+				 STATUS0_RESETC_POLL_TIMEOUT);
+	if (ret)
+		return -ENODEV;
+
+	/* Clear the reset complete status */
+	return oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, regval);
+}
+
+static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
+{
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
+	if (ret)
+		return ret;
+
+	regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
+		    INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
+		    INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
+		    INT_MASK0_HEADER_ERR_MASK);
+
+	return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
+}
+
+static int oa_tc6_enable_data_transfer(struct oa_tc6 *tc6)
+{
+	u32 value;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_CONFIG0, &value);
+	if (ret)
+		return ret;
+
+	/* Enable configuration synchronization for data transfer */
+	value |= CONFIG0_SYNC;
+
+	return oa_tc6_write_register(tc6, OA_TC6_REG_CONFIG0, value);
+}
+
+static void oa_tc6_cleanup_ongoing_rx_skb(struct oa_tc6 *tc6)
+{
+	if (tc6->rx_skb) {
+		tc6->netdev->stats.rx_dropped++;
+		kfree_skb(tc6->rx_skb);
+		tc6->rx_skb = NULL;
+	}
+}
+
+static void oa_tc6_cleanup_ongoing_tx_skb(struct oa_tc6 *tc6)
+{
+	if (tc6->ongoing_tx_skb) {
+		tc6->netdev->stats.tx_dropped++;
+		kfree_skb(tc6->ongoing_tx_skb);
+		tc6->ongoing_tx_skb = NULL;
+	}
+}
+
+static int oa_tc6_process_extended_status(struct oa_tc6 *tc6)
+{
+	u32 value;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_STATUS0, &value);
+	if (ret) {
+		netdev_err(tc6->netdev, "STATUS0 register read failed: %d\n",
+			   ret);
+		return ret;
+	}
+
+	/* Clear the error interrupts status */
+	ret = oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, value);
+	if (ret) {
+		netdev_err(tc6->netdev, "STATUS0 register write failed: %d\n",
+			   ret);
+		return ret;
+	}
+
+	if (FIELD_GET(STATUS0_RX_BUFFER_OVERFLOW_ERROR, value)) {
+		tc6->rx_buf_overflow = true;
+		oa_tc6_cleanup_ongoing_rx_skb(tc6);
+		net_err_ratelimited("%s: Receive buffer overflow error\n",
+				    tc6->netdev->name);
+		return -EAGAIN;
+	}
+	if (FIELD_GET(STATUS0_TX_PROTOCOL_ERROR, value)) {
+		netdev_err(tc6->netdev, "Transmit protocol error\n");
+		return -ENODEV;
+	}
+	/* TODO: Currently loss of frame and header errors are treated as
+	 * non-recoverable errors. They will be handled in the next version.
+	 */
+	if (FIELD_GET(STATUS0_LOSS_OF_FRAME_ERROR, value)) {
+		netdev_err(tc6->netdev, "Loss of frame error\n");
+		return -ENODEV;
+	}
+	if (FIELD_GET(STATUS0_HEADER_ERROR, value)) {
+		netdev_err(tc6->netdev, "Header error\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int oa_tc6_process_rx_chunk_footer(struct oa_tc6 *tc6, u32 footer)
+{
+	/* Process rx chunk footer for the following,
+	 * 1. tx credits
+	 * 2. errors if any from MAC-PHY
+	 * 3. receive chunks available
+	 */
+	tc6->tx_credits = FIELD_GET(OA_TC6_DATA_FOOTER_TX_CREDITS, footer);
+	tc6->rx_chunks_available = FIELD_GET(OA_TC6_DATA_FOOTER_RX_CHUNKS,
+					     footer);
+
+	if (FIELD_GET(OA_TC6_DATA_FOOTER_EXTENDED_STS, footer)) {
+		int ret = oa_tc6_process_extended_status(tc6);
+
+		if (ret)
+			return ret;
+	}
+
+	/* TODO: Currently received header bad and configuration unsync errors
+	 * are treated as non-recoverable errors. They will be handled in the
+	 * next version.
+	 */
+	if (FIELD_GET(OA_TC6_DATA_FOOTER_RXD_HEADER_BAD, footer)) {
+		netdev_err(tc6->netdev, "Rxd header bad error\n");
+		return -ENODEV;
+	}
+
+	if (!FIELD_GET(OA_TC6_DATA_FOOTER_CONFIG_SYNC, footer)) {
+		netdev_err(tc6->netdev, "Config unsync error\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void oa_tc6_submit_rx_skb(struct oa_tc6 *tc6)
+{
+	tc6->rx_skb->protocol = eth_type_trans(tc6->rx_skb, tc6->netdev);
+	tc6->netdev->stats.rx_packets++;
+	tc6->netdev->stats.rx_bytes += tc6->rx_skb->len;
+
+	netif_rx(tc6->rx_skb);
+
+	tc6->rx_skb = NULL;
+}
+
+static void oa_tc6_update_rx_skb(struct oa_tc6 *tc6, u8 *payload, u8 length)
+{
+	memcpy(skb_put(tc6->rx_skb, length), payload, length);
+}
+
+static int oa_tc6_allocate_rx_skb(struct oa_tc6 *tc6)
+{
+	tc6->rx_skb = netdev_alloc_skb_ip_align(tc6->netdev, tc6->netdev->mtu +
+						ETH_HLEN + ETH_FCS_LEN);
+	if (!tc6->rx_skb) {
+		tc6->netdev->stats.rx_dropped++;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int oa_tc6_prcs_complete_rx_frame(struct oa_tc6 *tc6, u8 *payload,
+					 u16 size)
+{
+	int ret;
+
+	ret = oa_tc6_allocate_rx_skb(tc6);
+	if (ret)
+		return ret;
+
+	oa_tc6_update_rx_skb(tc6, payload, size);
+
+	oa_tc6_submit_rx_skb(tc6);
+
+	return 0;
+}
+
+static int oa_tc6_prcs_rx_frame_start(struct oa_tc6 *tc6, u8 *payload, u16 size)
+{
+	int ret;
+
+	ret = oa_tc6_allocate_rx_skb(tc6);
+	if (ret)
+		return ret;
+
+	oa_tc6_update_rx_skb(tc6, payload, size);
+
+	return 0;
+}
+
+static void oa_tc6_prcs_rx_frame_end(struct oa_tc6 *tc6, u8 *payload, u16 size)
+{
+	oa_tc6_update_rx_skb(tc6, payload, size);
+
+	oa_tc6_submit_rx_skb(tc6);
+}
+
+static void oa_tc6_prcs_ongoing_rx_frame(struct oa_tc6 *tc6, u8 *payload,
+					 u32 footer)
+{
+	oa_tc6_update_rx_skb(tc6, payload, OA_TC6_CHUNK_PAYLOAD_SIZE);
+}
+
+static int oa_tc6_prcs_rx_chunk_payload(struct oa_tc6 *tc6, u8 *data,
+					u32 footer)
+{
+	u8 start_byte_offset = FIELD_GET(OA_TC6_DATA_FOOTER_START_WORD_OFFSET,
+					 footer) * sizeof(u32);
+	u8 end_byte_offset = FIELD_GET(OA_TC6_DATA_FOOTER_END_BYTE_OFFSET,
+				       footer);
+	bool start_valid = FIELD_GET(OA_TC6_DATA_FOOTER_START_VALID, footer);
+	bool end_valid = FIELD_GET(OA_TC6_DATA_FOOTER_END_VALID, footer);
+	u16 size;
+
+	/* Restart the new rx frame after receiving rx buffer overflow error */
+	if (start_valid && tc6->rx_buf_overflow)
+		tc6->rx_buf_overflow = false;
+
+	if (tc6->rx_buf_overflow)
+		return 0;
+
+	/* Process the chunk with complete rx frame */
+	if (start_valid && end_valid && start_byte_offset < end_byte_offset) {
+		size = end_byte_offset + 1 - start_byte_offset;
+		return oa_tc6_prcs_complete_rx_frame(tc6,
+						     &data[start_byte_offset],
+						     size);
+	}
+
+	/* Process the chunk with only rx frame start */
+	if (start_valid && !end_valid) {
+		size = OA_TC6_CHUNK_PAYLOAD_SIZE - start_byte_offset;
+		return oa_tc6_prcs_rx_frame_start(tc6,
+						  &data[start_byte_offset],
+						  size);
+	}
+
+	/* Process the chunk with only rx frame end */
+	if (end_valid && !start_valid) {
+		size = end_byte_offset + 1;
+		oa_tc6_prcs_rx_frame_end(tc6, data, size);
+		return 0;
+	}
+
+	/* Process the chunk with previous rx frame end and next rx frame
+	 * start.
+	 */
+	if (start_valid && end_valid && start_byte_offset > end_byte_offset) {
+		/* After rx buffer overflow error received, there might be a
+		 * possibility of getting an end valid of a previously
+		 * incomplete rx frame along with the new rx frame start valid.
+		 */
+		if (tc6->rx_skb) {
+			size = end_byte_offset + 1;
+			oa_tc6_prcs_rx_frame_end(tc6, data, size);
+		}
+		size = OA_TC6_CHUNK_PAYLOAD_SIZE - start_byte_offset;
+		return oa_tc6_prcs_rx_frame_start(tc6,
+						  &data[start_byte_offset],
+						  size);
+	}
+
+	/* Process the chunk with ongoing rx frame data */
+	oa_tc6_prcs_ongoing_rx_frame(tc6, data, footer);
+
+	return 0;
+}
+
+static u32 oa_tc6_get_rx_chunk_footer(struct oa_tc6 *tc6, u16 footer_offset)
+{
+	u8 *rx_buf = tc6->spi_data_rx_buf;
+	__be32 footer;
+
+	footer = *((__be32 *)&rx_buf[footer_offset]);
+
+	return be32_to_cpu(footer);
+}
+
+static int oa_tc6_process_spi_data_rx_buf(struct oa_tc6 *tc6, u16 length)
+{
+	u16 no_of_rx_chunks = length / OA_TC6_CHUNK_SIZE;
+	u32 footer;
+	int ret;
+
+	/* All the rx chunks in the receive SPI data buffer are examined here */
+	for (int i = 0; i < no_of_rx_chunks; i++) {
+		/* Last 4 bytes in each received chunk consist footer info */
+		footer = oa_tc6_get_rx_chunk_footer(tc6, i * OA_TC6_CHUNK_SIZE +
+						    OA_TC6_CHUNK_PAYLOAD_SIZE);
+
+		ret = oa_tc6_process_rx_chunk_footer(tc6, footer);
+		if (ret)
+			return ret;
+
+		/* If there is a data valid chunks then process it for the
+		 * information needed to determine the validity and the location
+		 * of the receive frame data.
+		 */
+		if (FIELD_GET(OA_TC6_DATA_FOOTER_DATA_VALID, footer)) {
+			u8 *payload = tc6->spi_data_rx_buf + i *
+				      OA_TC6_CHUNK_SIZE;
+
+			ret = oa_tc6_prcs_rx_chunk_payload(tc6, payload,
+							   footer);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static __be32 oa_tc6_prepare_data_header(bool data_valid, bool start_valid,
+					 bool end_valid, u8 end_byte_offset)
+{
+	u32 header = FIELD_PREP(OA_TC6_DATA_HEADER_DATA_NOT_CTRL,
+				OA_TC6_DATA_HEADER) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_DATA_VALID, data_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_START_VALID, start_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_END_VALID, end_valid) |
+		     FIELD_PREP(OA_TC6_DATA_HEADER_END_BYTE_OFFSET,
+				end_byte_offset);
+
+	header |= FIELD_PREP(OA_TC6_DATA_HEADER_PARITY,
+			     oa_tc6_get_parity(header));
+
+	return cpu_to_be32(header);
+}
+
+static void oa_tc6_add_tx_skb_to_spi_buf(struct oa_tc6 *tc6)
+{
+	enum oa_tc6_data_end_valid_info end_valid = OA_TC6_DATA_END_INVALID;
+	__be32 *tx_buf = tc6->spi_data_tx_buf + tc6->spi_data_tx_buf_offset;
+	u16 remaining_len = tc6->ongoing_tx_skb->len - tc6->tx_skb_offset;
+	u8 *tx_skb_data = tc6->ongoing_tx_skb->data + tc6->tx_skb_offset;
+	enum oa_tc6_data_start_valid_info start_valid;
+	u8 end_byte_offset = 0;
+	u16 length_to_copy;
+
+	/* Initial value is assigned here to avoid more than 80 characters in
+	 * the declaration place.
+	 */
+	start_valid = OA_TC6_DATA_START_INVALID;
+
+	/* Set start valid if the current tx chunk contains the start of the tx
+	 * ethernet frame.
+	 */
+	if (!tc6->tx_skb_offset)
+		start_valid = OA_TC6_DATA_START_VALID;
+
+	/* If the remaining tx skb length is more than the chunk payload size of
+	 * 64 bytes then copy only 64 bytes and leave the ongoing tx skb for
+	 * next tx chunk.
+	 */
+	length_to_copy = min_t(u16, remaining_len, OA_TC6_CHUNK_PAYLOAD_SIZE);
+
+	/* Copy the tx skb data to the tx chunk payload buffer */
+	memcpy(tx_buf + 1, tx_skb_data, length_to_copy);
+	tc6->tx_skb_offset += length_to_copy;
+
+	/* Set end valid if the current tx chunk contains the end of the tx
+	 * ethernet frame.
+	 */
+	if (tc6->ongoing_tx_skb->len == tc6->tx_skb_offset) {
+		end_valid = OA_TC6_DATA_END_VALID;
+		end_byte_offset = length_to_copy - 1;
+		tc6->tx_skb_offset = 0;
+		tc6->netdev->stats.tx_bytes += tc6->ongoing_tx_skb->len;
+		tc6->netdev->stats.tx_packets++;
+		kfree_skb(tc6->ongoing_tx_skb);
+		tc6->ongoing_tx_skb = NULL;
+	}
+
+	*tx_buf = oa_tc6_prepare_data_header(OA_TC6_DATA_VALID, start_valid,
+					     end_valid, end_byte_offset);
+	tc6->spi_data_tx_buf_offset += OA_TC6_CHUNK_SIZE;
+}
+
+static u16 oa_tc6_prepare_spi_tx_buf_for_tx_skbs(struct oa_tc6 *tc6)
+{
+	u16 used_tx_credits;
+
+	/* Get tx skbs and convert them into tx chunks based on the tx credits
+	 * available.
+	 */
+	for (used_tx_credits = 0; used_tx_credits < tc6->tx_credits;
+	     used_tx_credits++) {
+		if (!tc6->ongoing_tx_skb) {
+			tc6->ongoing_tx_skb = tc6->waiting_tx_skb;
+			tc6->waiting_tx_skb = NULL;
+		}
+		if (!tc6->ongoing_tx_skb)
+			break;
+		oa_tc6_add_tx_skb_to_spi_buf(tc6);
+	}
+
+	return used_tx_credits * OA_TC6_CHUNK_SIZE;
+}
+
+static void oa_tc6_add_empty_chunks_to_spi_buf(struct oa_tc6 *tc6,
+					       u16 needed_empty_chunks)
+{
+	__be32 header;
+
+	header = oa_tc6_prepare_data_header(OA_TC6_DATA_INVALID,
+					    OA_TC6_DATA_START_INVALID,
+					    OA_TC6_DATA_END_INVALID, 0);
+
+	while (needed_empty_chunks--) {
+		__be32 *tx_buf = tc6->spi_data_tx_buf +
+				 tc6->spi_data_tx_buf_offset;
+
+		*tx_buf = header;
+		tc6->spi_data_tx_buf_offset += OA_TC6_CHUNK_SIZE;
+	}
+}
+
+static u16 oa_tc6_prepare_spi_tx_buf_for_rx_chunks(struct oa_tc6 *tc6, u16 len)
+{
+	u16 tx_chunks = len / OA_TC6_CHUNK_SIZE;
+	u16 needed_empty_chunks;
+
+	/* If there are more chunks to receive than to transmit, we need to add
+	 * enough empty tx chunks to allow the reception of the excess rx
+	 * chunks.
+	 */
+	if (tx_chunks >= tc6->rx_chunks_available)
+		return len;
+
+	needed_empty_chunks = tc6->rx_chunks_available - tx_chunks;
+
+	oa_tc6_add_empty_chunks_to_spi_buf(tc6, needed_empty_chunks);
+
+	return needed_empty_chunks * OA_TC6_CHUNK_SIZE + len;
+}
+
+static int oa_tc6_try_spi_transfer(struct oa_tc6 *tc6)
+{
+	int ret;
+
+	while (true) {
+		u16 spi_len = 0;
+
+		tc6->spi_data_tx_buf_offset = 0;
+
+		if (tc6->ongoing_tx_skb || tc6->waiting_tx_skb)
+			spi_len = oa_tc6_prepare_spi_tx_buf_for_tx_skbs(tc6);
+
+		spi_len = oa_tc6_prepare_spi_tx_buf_for_rx_chunks(tc6, spi_len);
+
+		if (tc6->int_flag) {
+			tc6->int_flag = false;
+			if (spi_len == 0) {
+				oa_tc6_add_empty_chunks_to_spi_buf(tc6, 1);
+				spi_len = OA_TC6_CHUNK_SIZE;
+			}
+		}
+
+		if (spi_len == 0)
+			break;
+
+		ret = oa_tc6_spi_transfer(tc6, OA_TC6_DATA_HEADER, spi_len);
+		if (ret) {
+			netdev_err(tc6->netdev, "SPI data transfer failed: %d\n",
+				   ret);
+			return ret;
+		}
+
+		ret = oa_tc6_process_spi_data_rx_buf(tc6, spi_len);
+		if (ret) {
+			if (ret == -EAGAIN)
+				continue;
+
+			oa_tc6_cleanup_ongoing_tx_skb(tc6);
+			oa_tc6_cleanup_ongoing_rx_skb(tc6);
+			netdev_err(tc6->netdev, "Device error: %d\n", ret);
+			return ret;
+		}
+
+		if (!tc6->waiting_tx_skb && netif_queue_stopped(tc6->netdev))
+			netif_wake_queue(tc6->netdev);
+	}
+
+	return 0;
+}
+
+static int oa_tc6_spi_thread_handler(void *data)
+{
+	struct oa_tc6 *tc6 = data;
+	int ret;
+
+	while (likely(!kthread_should_stop())) {
+		/* This kthread will be waken up if there is a tx skb or mac-phy
+		 * interrupt to perform spi transfer with tx chunks.
+		 */
+		wait_event_interruptible(tc6->spi_wq, tc6->waiting_tx_skb ||
+					 tc6->int_flag ||
+					 kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		ret = oa_tc6_try_spi_transfer(tc6);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int oa_tc6_update_buffer_status_from_register(struct oa_tc6 *tc6)
+{
+	u32 value;
+	int ret;
+
+	/* Initially tx credits and rx chunks available to be updated from the
+	 * register as there is no data transfer performed yet. Later they will
+	 * be updated from the rx footer.
+	 */
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_BUFFER_STATUS, &value);
+	if (ret)
+		return ret;
+
+	tc6->tx_credits = FIELD_GET(BUFFER_STATUS_TX_CREDITS_AVAILABLE, value);
+	tc6->rx_chunks_available = FIELD_GET(BUFFER_STATUS_RX_CHUNKS_AVAILABLE,
+					     value);
+
+	return 0;
+}
+
+static irqreturn_t oa_tc6_macphy_isr(int irq, void *data)
+{
+	struct oa_tc6 *tc6 = data;
+
+	/* MAC-PHY interrupt can occur for the following reasons.
+	 * - availability of tx credits if it was 0 before and not reported in
+	 *   the previous rx footer.
+	 * - availability of rx chunks if it was 0 before and not reported in
+	 *   the previous rx footer.
+	 * - extended status event not reported in the previous rx footer.
+	 */
+	tc6->int_flag = true;
+	/* Wake spi kthread to perform spi transfer */
+	wake_up_interruptible(&tc6->spi_wq);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * oa_tc6_zero_align_receive_frame_enable - function to enable zero align
+ * receive frame feature.
+ * @tc6: oa_tc6 struct.
+ *
+ * Return: 0 on success otherwise failed.
+ */
+int oa_tc6_zero_align_receive_frame_enable(struct oa_tc6 *tc6)
+{
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_CONFIG0, &regval);
+	if (ret)
+		return ret;
+
+	/* Set Zero-Align Receive Frame Enable */
+	regval |= CONFIG0_ZARFE_ENABLE;
+
+	return oa_tc6_write_register(tc6, OA_TC6_REG_CONFIG0, regval);
+}
+EXPORT_SYMBOL_GPL(oa_tc6_zero_align_receive_frame_enable);
+
+/**
+ * oa_tc6_start_xmit - function for sending the tx skb which consists ethernet
+ * frame.
+ * @tc6: oa_tc6 struct.
+ * @skb: socket buffer in which the ethernet frame is stored.
+ *
+ * Return: NETDEV_TX_OK if the transmit ethernet frame skb added in the tx_skb_q
+ * otherwise returns NETDEV_TX_BUSY.
+ */
+netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb)
+{
+	if (tc6->waiting_tx_skb) {
+		netif_stop_queue(tc6->netdev);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (skb_linearize(skb)) {
+		dev_kfree_skb_any(skb);
+		tc6->netdev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	tc6->waiting_tx_skb = skb;
+
+	/* Wake spi kthread to perform spi transfer */
+	wake_up_interruptible(&tc6->spi_wq);
+
+	return NETDEV_TX_OK;
+}
+EXPORT_SYMBOL_GPL(oa_tc6_start_xmit);
+
+/**
+ * oa_tc6_init - allocates and initializes oa_tc6 structure.
+ * @spi: device with which data will be exchanged.
+ * @netdev: network device interface structure.
+ *
+ * Return: pointer reference to the oa_tc6 structure if the MAC-PHY
+ * initialization is successful otherwise NULL.
+ */
+struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
+{
+	struct oa_tc6 *tc6;
+	int ret;
+
+	tc6 = devm_kzalloc(&spi->dev, sizeof(*tc6), GFP_KERNEL);
+	if (!tc6)
+		return NULL;
+
+	tc6->spi = spi;
+	tc6->netdev = netdev;
+	SET_NETDEV_DEV(netdev, &spi->dev);
+	mutex_init(&tc6->spi_ctrl_lock);
+
+	/* Set the SPI controller to pump at realtime priority */
+	tc6->spi->rt = true;
+	spi_setup(tc6->spi);
+
+	tc6->spi_ctrl_tx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_CTRL_SPI_BUF_SIZE,
+					    GFP_KERNEL);
+	if (!tc6->spi_ctrl_tx_buf)
+		return NULL;
+
+	tc6->spi_ctrl_rx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_CTRL_SPI_BUF_SIZE,
+					    GFP_KERNEL);
+	if (!tc6->spi_ctrl_rx_buf)
+		return NULL;
+
+	tc6->spi_data_tx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_SPI_DATA_BUF_SIZE,
+					    GFP_KERNEL);
+	if (!tc6->spi_data_tx_buf)
+		return NULL;
+
+	tc6->spi_data_rx_buf = devm_kzalloc(&tc6->spi->dev,
+					    OA_TC6_SPI_DATA_BUF_SIZE,
+					    GFP_KERNEL);
+	if (!tc6->spi_data_rx_buf)
+		return NULL;
+
+	ret = oa_tc6_sw_reset_macphy(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"MAC-PHY software reset failed: %d\n", ret);
+		return NULL;
+	}
+
+	ret = oa_tc6_unmask_macphy_error_interrupts(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"MAC-PHY error interrupts unmask failed: %d\n", ret);
+		return NULL;
+	}
+
+	ret = oa_tc6_phy_init(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"MAC internal PHY initialization failed: %d\n", ret);
+		return NULL;
+	}
+
+	ret = oa_tc6_enable_data_transfer(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev, "Failed to enable data transfer: %d\n",
+			ret);
+		goto phy_exit;
+	}
+
+	ret = oa_tc6_update_buffer_status_from_register(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"Failed to update buffer status: %d\n", ret);
+		goto phy_exit;
+	}
+
+	init_waitqueue_head(&tc6->spi_wq);
+
+	tc6->spi_thread = kthread_run(oa_tc6_spi_thread_handler, tc6,
+				      "oa-tc6-spi-thread");
+	if (IS_ERR(tc6->spi_thread)) {
+		dev_err(&tc6->spi->dev, "Failed to create SPI thread\n");
+		goto phy_exit;
+	}
+
+	sched_set_fifo(tc6->spi_thread);
+
+	ret = devm_request_irq(&tc6->spi->dev, tc6->spi->irq, oa_tc6_macphy_isr,
+			       IRQF_TRIGGER_FALLING, dev_name(&tc6->spi->dev),
+			       tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev, "Failed to request macphy isr %d\n",
+			ret);
+		goto kthread_stop;
+	}
+
+	/* oa_tc6_sw_reset_macphy() function resets and clears the MAC-PHY reset
+	 * complete status. IRQ is also asserted on reset completion and it is
+	 * remain asserted until MAC-PHY receives a data chunk. So performing an
+	 * empty data chunk transmission will deassert the IRQ. Refer section
+	 * 7.7 and 9.2.8.8 in the OPEN Alliance specification for more details.
+	 */
+	tc6->int_flag = true;
+	wake_up_interruptible(&tc6->spi_wq);
+
+	return tc6;
+
+kthread_stop:
+	kthread_stop(tc6->spi_thread);
+phy_exit:
+	oa_tc6_phy_exit(tc6);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(oa_tc6_init);
+
+/**
+ * oa_tc6_exit - exit function.
+ * @tc6: oa_tc6 struct.
+ */
+void oa_tc6_exit(struct oa_tc6 *tc6)
+{
+	oa_tc6_phy_exit(tc6);
+	kthread_stop(tc6->spi_thread);
+	dev_kfree_skb_any(tc6->ongoing_tx_skb);
+	dev_kfree_skb_any(tc6->waiting_tx_skb);
+	dev_kfree_skb_any(tc6->rx_skb);
+}
+EXPORT_SYMBOL_GPL(oa_tc6_exit);
+
+MODULE_DESCRIPTION("OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface Lib");
+MODULE_AUTHOR("Parthiban Veerasooran <parthiban.veerasooran@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index 62ba269da902..cb4e12df7719 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -1699,8 +1699,9 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netif_napi_add(dev, &mac->napi, pasemi_mac_poll);
 
-	dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG |
-			NETIF_F_HIGHDMA | NETIF_F_GSO;
+	dev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA |
+			NETIF_F_GSO;
+	dev->lltx = true;
 
 	mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
 	if (!mac->dma_pdev) {
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 3f7519e435b8..01fe76786f77 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -23,6 +23,7 @@ config IONIC
 	depends on PTP_1588_CLOCK_OPTIONAL
 	select NET_DEVLINK
 	select DIMLIB
+	select PAGE_POOL
 	help
 	  This enables the support for the Pensando family of Ethernet
 	  adapters.  More specific information on this driver can be
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 59e5a9f21105..c98b4e75e288 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -123,7 +123,7 @@ void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	struct ionic_cq *cq = &qcq->cq;
 
 	qcq_dentry = debugfs_create_dir(q->name, lif->dentry);
-	if (IS_ERR_OR_NULL(qcq_dentry))
+	if (IS_ERR(qcq_dentry))
 		return;
 	qcq->dentry = qcq_dentry;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index c647033f3ad2..c8c710cfe70c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -32,7 +32,7 @@
 #define IONIC_ADMIN_DOORBELL_DEADLINE	(HZ / 2)	/* 500ms */
 #define IONIC_TX_DOORBELL_DEADLINE	(HZ / 100)	/* 10ms */
 #define IONIC_RX_MIN_DOORBELL_DEADLINE	(HZ / 100)	/* 10ms */
-#define IONIC_RX_MAX_DOORBELL_DEADLINE	(HZ * 5)	/* 5s */
+#define IONIC_RX_MAX_DOORBELL_DEADLINE	(HZ * 4)	/* 4s */
 
 struct ionic_dev_bar {
 	void __iomem *vaddr;
@@ -181,10 +181,7 @@ struct ionic_queue;
 struct ionic_qcq;
 
 #define IONIC_MAX_BUF_LEN			((u16)-1)
-#define IONIC_PAGE_SIZE				PAGE_SIZE
-#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
-#define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
-						 __GFP_COMP | __GFP_MEMALLOC)
+#define IONIC_PAGE_SIZE				MIN(PAGE_SIZE, IONIC_MAX_BUF_LEN)
 
 #define IONIC_XDP_MAX_LINEAR_MTU	(IONIC_PAGE_SIZE -	\
 					 (VLAN_ETH_HLEN +	\
@@ -238,9 +235,8 @@ struct ionic_queue {
 	unsigned int index;
 	unsigned int num_descs;
 	unsigned int max_sg_elems;
+
 	u64 features;
-	unsigned int type;
-	unsigned int hw_index;
 	unsigned int hw_type;
 	bool xdp_flush;
 	union {
@@ -250,18 +246,23 @@ struct ionic_queue {
 		struct ionic_admin_cmd *adminq;
 	};
 	union {
-		void __iomem *cmb_base;
-		struct ionic_txq_desc __iomem *cmb_txq;
-		struct ionic_rxq_desc __iomem *cmb_rxq;
-	};
-	union {
 		void *sg_base;
 		struct ionic_txq_sg_desc *txq_sgl;
 		struct ionic_txq_sg_desc_v1 *txq_sgl_v1;
 		struct ionic_rxq_sg_desc *rxq_sgl;
 	};
 	struct xdp_rxq_info *xdp_rxq_info;
+	struct bpf_prog *xdp_prog;
+	struct page_pool *page_pool;
 	struct ionic_queue *partner;
+
+	union {
+		void __iomem *cmb_base;
+		struct ionic_txq_desc __iomem *cmb_txq;
+		struct ionic_rxq_desc __iomem *cmb_rxq;
+	};
+	unsigned int type;
+	unsigned int hw_index;
 	dma_addr_t base_pa;
 	dma_addr_t cmb_base_pa;
 	dma_addr_t sg_base_pa;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 4619fd74f3e3..dda22fa4448c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -989,8 +989,6 @@ static int ionic_get_ts_info(struct net_device *netdev,
 	info->phc_index = ptp_clock_index(lif->phc->ptp);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index aa0cc31dfe6e..40496587b2b3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -13,6 +13,7 @@
 #include <linux/cpumask.h>
 #include <linux/crash_dump.h>
 #include <linux/vmalloc.h>
+#include <net/page_pool/helpers.h>
 
 #include "ionic.h"
 #include "ionic_bus.h"
@@ -46,8 +47,9 @@ static int ionic_start_queues(struct ionic_lif *lif);
 static void ionic_stop_queues(struct ionic_lif *lif);
 static void ionic_lif_queue_identify(struct ionic_lif *lif);
 
-static int ionic_xdp_queues_config(struct ionic_lif *lif);
-static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q);
+static void ionic_xdp_rxqs_prog_update(struct ionic_lif *lif);
+static void ionic_unregister_rxq_info(struct ionic_queue *q);
+static int ionic_register_rxq_info(struct ionic_queue *q, unsigned int napi_id);
 
 static void ionic_dim_work(struct work_struct *work)
 {
@@ -380,6 +382,7 @@ static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	if (!(qcq->flags & IONIC_QCQ_F_INITED))
 		return;
 
+	ionic_unregister_rxq_info(&qcq->q);
 	if (qcq->flags & IONIC_QCQ_F_INTR) {
 		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
 				IONIC_INTR_MASK_SET);
@@ -437,9 +440,10 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 		qcq->sg_base_pa = 0;
 	}
 
-	ionic_xdp_unregister_rxq_info(&qcq->q);
-	ionic_qcq_intr_free(lif, qcq);
+	page_pool_destroy(qcq->q.page_pool);
+	qcq->q.page_pool = NULL;
 
+	ionic_qcq_intr_free(lif, qcq);
 	vfree(qcq->q.info);
 	qcq->q.info = NULL;
 }
@@ -553,7 +557,8 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			   unsigned int cq_desc_size,
 			   unsigned int sg_desc_size,
 			   unsigned int desc_info_size,
-			   unsigned int pid, struct ionic_qcq **qcq)
+			   unsigned int pid, struct bpf_prog *xdp_prog,
+			   struct ionic_qcq **qcq)
 {
 	struct ionic_dev *idev = &lif->ionic->idev;
 	struct device *dev = lif->ionic->dev;
@@ -579,6 +584,31 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 		goto err_out_free_qcq;
 	}
 
+	if (type == IONIC_QTYPE_RXQ) {
+		struct page_pool_params pp_params = {
+			.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+			.order = 0,
+			.pool_size = num_descs,
+			.nid = NUMA_NO_NODE,
+			.dev = lif->ionic->dev,
+			.napi = &new->napi,
+			.dma_dir = DMA_FROM_DEVICE,
+			.max_len = PAGE_SIZE,
+			.netdev = lif->netdev,
+		};
+
+		if (xdp_prog)
+			pp_params.dma_dir = DMA_BIDIRECTIONAL;
+
+		new->q.page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(new->q.page_pool)) {
+			netdev_err(lif->netdev, "Cannot create page_pool\n");
+			err = PTR_ERR(new->q.page_pool);
+			new->q.page_pool = NULL;
+			goto err_out_free_q_info;
+		}
+	}
+
 	new->q.type = type;
 	new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems;
 
@@ -586,12 +616,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			   desc_size, sg_desc_size, pid);
 	if (err) {
 		netdev_err(lif->netdev, "Cannot initialize queue\n");
-		goto err_out_free_q_info;
+		goto err_out_free_page_pool;
 	}
 
 	err = ionic_alloc_qcq_interrupt(lif, new);
 	if (err)
-		goto err_out_free_q_info;
+		goto err_out_free_page_pool;
 
 	err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
 	if (err) {
@@ -712,6 +742,8 @@ err_out_free_irq:
 		devm_free_irq(dev, new->intr.vector, &new->napi);
 		ionic_intr_free(lif->ionic, new->intr.index);
 	}
+err_out_free_page_pool:
+	page_pool_destroy(new->q.page_pool);
 err_out_free_q_info:
 	vfree(new->q.info);
 err_out_free_qcq:
@@ -734,7 +766,7 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
 			      sizeof(struct ionic_admin_comp),
 			      0,
 			      sizeof(struct ionic_admin_desc_info),
-			      lif->kern_pid, &lif->adminqcq);
+			      lif->kern_pid, NULL, &lif->adminqcq);
 	if (err)
 		return err;
 	ionic_debugfs_add_qcq(lif, lif->adminqcq);
@@ -747,7 +779,7 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
 				      sizeof(union ionic_notifyq_comp),
 				      0,
 				      sizeof(struct ionic_admin_desc_info),
-				      lif->kern_pid, &lif->notifyqcq);
+				      lif->kern_pid, NULL, &lif->notifyqcq);
 		if (err)
 			goto err_out;
 		ionic_debugfs_add_qcq(lif, lif->notifyqcq);
@@ -925,6 +957,11 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 		netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi);
 	else
 		netif_napi_add(lif->netdev, &qcq->napi, ionic_txrx_napi);
+	err = ionic_register_rxq_info(q, qcq->napi.napi_id);
+	if (err) {
+		netif_napi_del(&qcq->napi);
+		return err;
+	}
 
 	qcq->flags |= IONIC_QCQ_F_INITED;
 
@@ -960,7 +997,7 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
 	err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, txq_i, "hwstamp_tx", flags,
 			      num_desc, desc_sz, comp_sz, sg_desc_sz,
 			      sizeof(struct ionic_tx_desc_info),
-			      lif->kern_pid, &txq);
+			      lif->kern_pid, NULL, &txq);
 	if (err)
 		goto err_qcq_alloc;
 
@@ -1020,7 +1057,7 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
 	err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, rxq_i, "hwstamp_rx", flags,
 			      num_desc, desc_sz, comp_sz, sg_desc_sz,
 			      sizeof(struct ionic_rx_desc_info),
-			      lif->kern_pid, &rxq);
+			      lif->kern_pid, NULL, &rxq);
 	if (err)
 		goto err_qcq_alloc;
 
@@ -1037,7 +1074,7 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
 			goto err_qcq_init;
 
 		if (test_bit(IONIC_LIF_F_UP, lif->state)) {
-			ionic_rx_fill(&rxq->q);
+			ionic_rx_fill(&rxq->q, NULL);
 			err = ionic_qcq_enable(rxq);
 			if (err)
 				goto err_qcq_enable;
@@ -2046,7 +2083,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
 				      num_desc, desc_sz, comp_sz, sg_desc_sz,
 				      sizeof(struct ionic_tx_desc_info),
-				      lif->kern_pid, &lif->txqcqs[i]);
+				      lif->kern_pid, NULL, &lif->txqcqs[i]);
 		if (err)
 			goto err_out;
 
@@ -2078,7 +2115,8 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
 				      num_desc, desc_sz, comp_sz, sg_desc_sz,
 				      sizeof(struct ionic_rx_desc_info),
-				      lif->kern_pid, &lif->rxqcqs[i]);
+				      lif->kern_pid, lif->xdp_prog,
+				      &lif->rxqcqs[i]);
 		if (err)
 			goto err_out;
 
@@ -2143,9 +2181,7 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
 	int derr = 0;
 	int i, err;
 
-	err = ionic_xdp_queues_config(lif);
-	if (err)
-		return err;
+	ionic_xdp_rxqs_prog_update(lif);
 
 	for (i = 0; i < lif->nxqs; i++) {
 		if (!(lif->rxqcqs[i] && lif->txqcqs[i])) {
@@ -2154,7 +2190,8 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
 			goto err_out;
 		}
 
-		ionic_rx_fill(&lif->rxqcqs[i]->q);
+		ionic_rx_fill(&lif->rxqcqs[i]->q,
+			      READ_ONCE(lif->rxqcqs[i]->q.xdp_prog));
 		err = ionic_qcq_enable(lif->rxqcqs[i]);
 		if (err)
 			goto err_out;
@@ -2167,7 +2204,7 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
 	}
 
 	if (lif->hwstamp_rxq) {
-		ionic_rx_fill(&lif->hwstamp_rxq->q);
+		ionic_rx_fill(&lif->hwstamp_rxq->q, NULL);
 		err = ionic_qcq_enable(lif->hwstamp_rxq);
 		if (err)
 			goto err_out_hwstamp_rx;
@@ -2192,7 +2229,7 @@ err_out:
 		derr = ionic_qcq_disable(lif, lif->rxqcqs[i], derr);
 	}
 
-	ionic_xdp_queues_config(lif);
+	ionic_xdp_rxqs_prog_update(lif);
 
 	return err;
 }
@@ -2651,7 +2688,7 @@ static void ionic_vf_attr_replay(struct ionic_lif *lif)
 	ionic_vf_start(ionic);
 }
 
-static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q)
+static void ionic_unregister_rxq_info(struct ionic_queue *q)
 {
 	struct xdp_rxq_info *xi;
 
@@ -2665,7 +2702,7 @@ static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q)
 	kfree(xi);
 }
 
-static int ionic_xdp_register_rxq_info(struct ionic_queue *q, unsigned int napi_id)
+static int ionic_register_rxq_info(struct ionic_queue *q, unsigned int napi_id)
 {
 	struct xdp_rxq_info *rxq_info;
 	int err;
@@ -2676,15 +2713,15 @@ static int ionic_xdp_register_rxq_info(struct ionic_queue *q, unsigned int napi_
 
 	err = xdp_rxq_info_reg(rxq_info, q->lif->netdev, q->index, napi_id);
 	if (err) {
-		dev_err(q->dev, "Queue %d xdp_rxq_info_reg failed, err %d\n",
-			q->index, err);
+		netdev_err(q->lif->netdev, "q%d xdp_rxq_info_reg failed, err %d\n",
+			   q->index, err);
 		goto err_out;
 	}
 
-	err = xdp_rxq_info_reg_mem_model(rxq_info, MEM_TYPE_PAGE_ORDER0, NULL);
+	err = xdp_rxq_info_reg_mem_model(rxq_info, MEM_TYPE_PAGE_POOL, q->page_pool);
 	if (err) {
-		dev_err(q->dev, "Queue %d xdp_rxq_info_reg_mem_model failed, err %d\n",
-			q->index, err);
+		netdev_err(q->lif->netdev, "q%d xdp_rxq_info_reg_mem_model failed, err %d\n",
+			   q->index, err);
 		xdp_rxq_info_unreg(rxq_info);
 		goto err_out;
 	}
@@ -2698,44 +2735,20 @@ err_out:
 	return err;
 }
 
-static int ionic_xdp_queues_config(struct ionic_lif *lif)
+static void ionic_xdp_rxqs_prog_update(struct ionic_lif *lif)
 {
+	struct bpf_prog *xdp_prog;
 	unsigned int i;
-	int err;
 
 	if (!lif->rxqcqs)
-		return 0;
-
-	/* There's no need to rework memory if not going to/from NULL program.
-	 * If there is no lif->xdp_prog, there should also be no q.xdp_rxq_info
-	 * This way we don't need to keep an *xdp_prog in every queue struct.
-	 */
-	if (!lif->xdp_prog == !lif->rxqcqs[0]->q.xdp_rxq_info)
-		return 0;
+		return;
 
+	xdp_prog = READ_ONCE(lif->xdp_prog);
 	for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) {
 		struct ionic_queue *q = &lif->rxqcqs[i]->q;
 
-		if (q->xdp_rxq_info) {
-			ionic_xdp_unregister_rxq_info(q);
-			continue;
-		}
-
-		err = ionic_xdp_register_rxq_info(q, lif->rxqcqs[i]->napi.napi_id);
-		if (err) {
-			dev_err(lif->ionic->dev, "failed to register RX queue %d info for XDP, err %d\n",
-				i, err);
-			goto err_out;
-		}
+		WRITE_ONCE(q->xdp_prog, xdp_prog);
 	}
-
-	return 0;
-
-err_out:
-	for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++)
-		ionic_xdp_unregister_rxq_info(&lif->rxqcqs[i]->q);
-
-	return err;
 }
 
 static int ionic_xdp_config(struct net_device *netdev, struct netdev_bpf *bpf)
@@ -2765,11 +2778,17 @@ static int ionic_xdp_config(struct net_device *netdev, struct netdev_bpf *bpf)
 
 	if (!netif_running(netdev)) {
 		old_prog = xchg(&lif->xdp_prog, bpf->prog);
+	} else if (lif->xdp_prog && bpf->prog) {
+		old_prog = xchg(&lif->xdp_prog, bpf->prog);
+		ionic_xdp_rxqs_prog_update(lif);
 	} else {
+		struct ionic_queue_params qparams;
+
+		ionic_init_queue_params(lif, &qparams);
+		qparams.xdp_prog = bpf->prog;
 		mutex_lock(&lif->queue_lock);
-		ionic_stop_queues_reconfig(lif);
+		ionic_reconfigure_queues(lif, &qparams);
 		old_prog = xchg(&lif->xdp_prog, bpf->prog);
-		ionic_start_queues_reconfig(lif);
 		mutex_unlock(&lif->queue_lock);
 	}
 
@@ -2871,13 +2890,23 @@ err_out:
 
 static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 {
-	/* only swapping the queues, not the napi, flags, or other stuff */
+	/* only swapping the queues and napi, not flags or other stuff */
+	swap(a->napi,         b->napi);
+
+	if (a->q.type == IONIC_QTYPE_RXQ) {
+		swap(a->q.page_pool, b->q.page_pool);
+		a->q.page_pool->p.napi = &a->napi;
+		if (b->q.page_pool)  /* is NULL when increasing queue count */
+			b->q.page_pool->p.napi = &b->napi;
+	}
+
 	swap(a->q.features,   b->q.features);
 	swap(a->q.num_descs,  b->q.num_descs);
 	swap(a->q.desc_size,  b->q.desc_size);
 	swap(a->q.base,       b->q.base);
 	swap(a->q.base_pa,    b->q.base_pa);
 	swap(a->q.info,       b->q.info);
+	swap(a->q.xdp_prog,   b->q.xdp_prog);
 	swap(a->q.xdp_rxq_info, b->q.xdp_rxq_info);
 	swap(a->q.partner,    b->q.partner);
 	swap(a->q_base,       b->q_base);
@@ -2928,7 +2957,8 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	}
 	if (qparam->nxqs != lif->nxqs ||
 	    qparam->nrxq_descs != lif->nrxq_descs ||
-	    qparam->rxq_features != lif->rxq_features) {
+	    qparam->rxq_features != lif->rxq_features ||
+	    qparam->xdp_prog != lif->xdp_prog) {
 		rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
 				       sizeof(struct ionic_qcq *), GFP_KERNEL);
 		if (!rx_qcqs) {
@@ -2959,7 +2989,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 				err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
 						      4, desc_sz, comp_sz, sg_desc_sz,
 						      sizeof(struct ionic_tx_desc_info),
-						      lif->kern_pid, &lif->txqcqs[i]);
+						      lif->kern_pid, NULL, &lif->txqcqs[i]);
 				if (err)
 					goto err_out;
 			}
@@ -2968,7 +2998,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 			err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
 					      num_desc, desc_sz, comp_sz, sg_desc_sz,
 					      sizeof(struct ionic_tx_desc_info),
-					      lif->kern_pid, &tx_qcqs[i]);
+					      lif->kern_pid, NULL, &tx_qcqs[i]);
 			if (err)
 				goto err_out;
 		}
@@ -2990,7 +3020,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 				err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
 						      4, desc_sz, comp_sz, sg_desc_sz,
 						      sizeof(struct ionic_rx_desc_info),
-						      lif->kern_pid, &lif->rxqcqs[i]);
+						      lif->kern_pid, NULL, &lif->rxqcqs[i]);
 				if (err)
 					goto err_out;
 			}
@@ -2999,11 +3029,12 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 			err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
 					      num_desc, desc_sz, comp_sz, sg_desc_sz,
 					      sizeof(struct ionic_rx_desc_info),
-					      lif->kern_pid, &rx_qcqs[i]);
+					      lif->kern_pid, qparam->xdp_prog, &rx_qcqs[i]);
 			if (err)
 				goto err_out;
 
 			rx_qcqs[i]->q.features = qparam->rxq_features;
+			rx_qcqs[i]->q.xdp_prog = qparam->xdp_prog;
 		}
 	}
 
@@ -3220,7 +3251,7 @@ int ionic_lif_alloc(struct ionic *ionic)
 	netdev->netdev_ops = &ionic_netdev_ops;
 	ionic_ethtool_set_ops(netdev);
 
-	netdev->watchdog_timeo = 2 * HZ;
+	netdev->watchdog_timeo = 5 * HZ;
 	netif_carrier_off(netdev);
 
 	lif->identity = lid;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 3e1005293c4a..e01756fb7fdd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -268,6 +268,7 @@ struct ionic_queue_params {
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	u64 rxq_features;
+	struct bpf_prog *xdp_prog;
 	bool intr_split;
 	bool cmb_tx;
 	bool cmb_rx;
@@ -280,6 +281,7 @@ static inline void ionic_init_queue_params(struct ionic_lif *lif,
 	qparam->ntxq_descs = lif->ntxq_descs;
 	qparam->nrxq_descs = lif->nrxq_descs;
 	qparam->rxq_features = lif->rxq_features;
+	qparam->xdp_prog = lif->xdp_prog;
 	qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
 	qparam->cmb_tx = test_bit(IONIC_LIF_F_CMB_TX_RINGS, lif->state);
 	qparam->cmb_rx = test_bit(IONIC_LIF_F_CMB_RX_RINGS, lif->state);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
index 1ee2f285cb42..528114877677 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -312,8 +312,8 @@ static int ionic_lif_filter_add(struct ionic_lif *lif,
 	int err = 0;
 
 	ctx.cmd.rx_filter_add = *ac;
-	ctx.cmd.rx_filter_add.opcode = IONIC_CMD_RX_FILTER_ADD,
-	ctx.cmd.rx_filter_add.lif_index = cpu_to_le16(lif->index),
+	ctx.cmd.rx_filter_add.opcode = IONIC_CMD_RX_FILTER_ADD;
+	ctx.cmd.rx_filter_add.lif_index = cpu_to_le16(lif->index);
 
 	spin_lock_bh(&lif->rx_filters.lock);
 	f = ionic_rx_filter_find(lif, &ctx.cmd.rx_filter_add);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index fc79baad4561..0eeda7e502db 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -6,6 +6,7 @@
 #include <linux/if_vlan.h>
 #include <net/ip6_checksum.h>
 #include <net/netdev_queues.h>
+#include <net/page_pool/helpers.h>
 
 #include "ionic.h"
 #include "ionic_lif.h"
@@ -118,108 +119,57 @@ static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info)
 
 static dma_addr_t ionic_rx_buf_pa(struct ionic_buf_info *buf_info)
 {
-	return buf_info->dma_addr + buf_info->page_offset;
+	return page_pool_get_dma_addr(buf_info->page) + buf_info->page_offset;
 }
 
-static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info)
+static void __ionic_rx_put_buf(struct ionic_queue *q,
+			       struct ionic_buf_info *buf_info,
+			       bool recycle_direct)
 {
-	return min_t(u32, IONIC_MAX_BUF_LEN, IONIC_PAGE_SIZE - buf_info->page_offset);
-}
-
-static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_buf_info *buf_info)
-{
-	struct device *dev = q->dev;
-	dma_addr_t dma_addr;
-	struct page *page;
-
-	page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
-	if (unlikely(!page)) {
-		net_err_ratelimited("%s: %s page alloc failed\n",
-				    dev_name(dev), q->name);
-		q_to_rx_stats(q)->alloc_err++;
-		return -ENOMEM;
-	}
-
-	dma_addr = dma_map_page(dev, page, 0,
-				IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, dma_addr))) {
-		__free_pages(page, 0);
-		net_err_ratelimited("%s: %s dma map failed\n",
-				    dev_name(dev), q->name);
-		q_to_rx_stats(q)->dma_map_err++;
-		return -EIO;
-	}
-
-	buf_info->dma_addr = dma_addr;
-	buf_info->page = page;
-	buf_info->page_offset = 0;
-
-	return 0;
-}
-
-static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_buf_info *buf_info)
-{
-	struct device *dev = q->dev;
-
-	if (unlikely(!buf_info)) {
-		net_err_ratelimited("%s: %s invalid buf_info in free\n",
-				    dev_name(dev), q->name);
-		return;
-	}
-
 	if (!buf_info->page)
 		return;
 
-	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-	__free_pages(buf_info->page, 0);
+	page_pool_put_full_page(q->page_pool, buf_info->page, recycle_direct);
 	buf_info->page = NULL;
+	buf_info->len = 0;
+	buf_info->page_offset = 0;
 }
 
-static bool ionic_rx_buf_recycle(struct ionic_queue *q,
-				 struct ionic_buf_info *buf_info, u32 len)
-{
-	u32 size;
-
-	/* don't re-use pages allocated in low-mem condition */
-	if (page_is_pfmemalloc(buf_info->page))
-		return false;
-
-	/* don't re-use buffers from non-local numa nodes */
-	if (page_to_nid(buf_info->page) != numa_mem_id())
-		return false;
-
-	size = ALIGN(len, q->xdp_rxq_info ? IONIC_PAGE_SIZE : IONIC_PAGE_SPLIT_SZ);
-	buf_info->page_offset += size;
-	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
-		return false;
 
-	get_page(buf_info->page);
+static void ionic_rx_put_buf(struct ionic_queue *q,
+			     struct ionic_buf_info *buf_info)
+{
+	__ionic_rx_put_buf(q, buf_info, false);
+}
 
-	return true;
+static void ionic_rx_put_buf_direct(struct ionic_queue *q,
+				    struct ionic_buf_info *buf_info)
+{
+	__ionic_rx_put_buf(q, buf_info, true);
 }
 
 static void ionic_rx_add_skb_frag(struct ionic_queue *q,
 				  struct sk_buff *skb,
 				  struct ionic_buf_info *buf_info,
-				  u32 off, u32 len,
+				  u32 headroom, u32 len,
 				  bool synced)
 {
 	if (!synced)
-		dma_sync_single_range_for_cpu(q->dev, ionic_rx_buf_pa(buf_info),
-					      off, len, DMA_FROM_DEVICE);
+		page_pool_dma_sync_for_cpu(q->page_pool,
+					   buf_info->page,
+					   buf_info->page_offset + headroom,
+					   len);
 
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-			buf_info->page, buf_info->page_offset + off,
-			len,
-			IONIC_PAGE_SIZE);
+			buf_info->page, buf_info->page_offset + headroom,
+			len, buf_info->len);
 
-	if (!ionic_rx_buf_recycle(q, buf_info, len)) {
-		dma_unmap_page(q->dev, buf_info->dma_addr,
-			       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-		buf_info->page = NULL;
-	}
+	/* napi_gro_frags() will release/recycle the
+	 * page_pool buffers from the frags list
+	 */
+	buf_info->page = NULL;
+	buf_info->len = 0;
+	buf_info->page_offset = 0;
 }
 
 static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q,
@@ -244,12 +194,13 @@ static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q,
 		q_to_rx_stats(q)->alloc_err++;
 		return NULL;
 	}
+	skb_mark_for_recycle(skb);
 
 	if (headroom)
 		frag_len = min_t(u16, len,
 				 IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN);
 	else
-		frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE);
 
 	if (unlikely(!buf_info->page))
 		goto err_bad_buf_page;
@@ -260,7 +211,7 @@ static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q,
 	for (i = 0; i < num_sg_elems; i++, buf_info++) {
 		if (unlikely(!buf_info->page))
 			goto err_bad_buf_page;
-		frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
+		frag_len = min_t(u16, len, buf_info->len);
 		ionic_rx_add_skb_frag(q, skb, buf_info, 0, frag_len, synced);
 		len -= frag_len;
 	}
@@ -277,11 +228,13 @@ static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev,
 					  struct ionic_rx_desc_info *desc_info,
 					  unsigned int headroom,
 					  unsigned int len,
+					  unsigned int num_sg_elems,
 					  bool synced)
 {
 	struct ionic_buf_info *buf_info;
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
+	int i;
 
 	buf_info = &desc_info->bufs[0];
 
@@ -292,54 +245,52 @@ static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev,
 		q_to_rx_stats(q)->alloc_err++;
 		return NULL;
 	}
-
-	if (unlikely(!buf_info->page)) {
-		dev_kfree_skb(skb);
-		return NULL;
-	}
+	skb_mark_for_recycle(skb);
 
 	if (!synced)
-		dma_sync_single_range_for_cpu(dev, ionic_rx_buf_pa(buf_info),
-					      headroom, len, DMA_FROM_DEVICE);
+		page_pool_dma_sync_for_cpu(q->page_pool,
+					   buf_info->page,
+					   buf_info->page_offset + headroom,
+					   len);
+
 	skb_copy_to_linear_data(skb, ionic_rx_buf_va(buf_info) + headroom, len);
-	dma_sync_single_range_for_device(dev, ionic_rx_buf_pa(buf_info),
-					 headroom, len, DMA_FROM_DEVICE);
 
 	skb_put(skb, len);
 	skb->protocol = eth_type_trans(skb, netdev);
 
+	/* recycle the Rx buffer now that we're done with it */
+	ionic_rx_put_buf_direct(q, buf_info);
+	buf_info++;
+	for (i = 0; i < num_sg_elems; i++, buf_info++)
+		ionic_rx_put_buf_direct(q, buf_info);
+
 	return skb;
 }
 
 static void ionic_xdp_tx_desc_clean(struct ionic_queue *q,
-				    struct ionic_tx_desc_info *desc_info)
+				    struct ionic_tx_desc_info *desc_info,
+				    bool in_napi)
 {
-	unsigned int nbufs = desc_info->nbufs;
-	struct ionic_buf_info *buf_info;
-	struct device *dev = q->dev;
-	int i;
+	struct xdp_frame_bulk bq;
 
-	if (!nbufs)
+	if (!desc_info->nbufs)
 		return;
 
-	buf_info = desc_info->bufs;
-	dma_unmap_single(dev, buf_info->dma_addr,
-			 buf_info->len, DMA_TO_DEVICE);
-	if (desc_info->act == XDP_TX)
-		__free_pages(buf_info->page, 0);
-	buf_info->page = NULL;
+	xdp_frame_bulk_init(&bq);
+	rcu_read_lock(); /* need for xdp_return_frame_bulk */
 
-	buf_info++;
-	for (i = 1; i < nbufs + 1 && buf_info->page; i++, buf_info++) {
-		dma_unmap_page(dev, buf_info->dma_addr,
-			       buf_info->len, DMA_TO_DEVICE);
-		if (desc_info->act == XDP_TX)
-			__free_pages(buf_info->page, 0);
-		buf_info->page = NULL;
+	if (desc_info->act == XDP_TX) {
+		if (likely(in_napi))
+			xdp_return_frame_rx_napi(desc_info->xdpf);
+		else
+			xdp_return_frame(desc_info->xdpf);
+	} else if (desc_info->act == XDP_REDIRECT) {
+		ionic_tx_desc_unmap_bufs(q, desc_info);
+		xdp_return_frame_bulk(desc_info->xdpf, &bq);
 	}
 
-	if (desc_info->act == XDP_REDIRECT)
-		xdp_return_frame(desc_info->xdpf);
+	xdp_flush_frame_bulk(&bq);
+	rcu_read_unlock();
 
 	desc_info->nbufs = 0;
 	desc_info->xdpf = NULL;
@@ -363,9 +314,17 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame,
 	buf_info = desc_info->bufs;
 	stats = q_to_tx_stats(q);
 
-	dma_addr = ionic_tx_map_single(q, frame->data, len);
-	if (!dma_addr)
-		return -EIO;
+	if (act == XDP_TX) {
+		dma_addr = page_pool_get_dma_addr(page) +
+			   off + XDP_PACKET_HEADROOM;
+		dma_sync_single_for_device(q->dev, dma_addr,
+					   len, DMA_TO_DEVICE);
+	} else /* XDP_REDIRECT */ {
+		dma_addr = ionic_tx_map_single(q, frame->data, len);
+		if (!dma_addr)
+			return -EIO;
+	}
+
 	buf_info->dma_addr = dma_addr;
 	buf_info->len = len;
 	buf_info->page = page;
@@ -387,10 +346,21 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame,
 		frag = sinfo->frags;
 		elem = ionic_tx_sg_elems(q);
 		for (i = 0; i < sinfo->nr_frags; i++, frag++, bi++) {
-			dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
-			if (!dma_addr) {
-				ionic_tx_desc_unmap_bufs(q, desc_info);
-				return -EIO;
+			if (act == XDP_TX) {
+				struct page *pg = skb_frag_page(frag);
+
+				dma_addr = page_pool_get_dma_addr(pg) +
+					   skb_frag_off(frag);
+				dma_sync_single_for_device(q->dev, dma_addr,
+							   skb_frag_size(frag),
+							   DMA_TO_DEVICE);
+			} else {
+				dma_addr = ionic_tx_map_frag(q, frag, 0,
+							     skb_frag_size(frag));
+				if (dma_mapping_error(q->dev, dma_addr)) {
+					ionic_tx_desc_unmap_bufs(q, desc_info);
+					return -EIO;
+				}
 			}
 			bi->dma_addr = dma_addr;
 			bi->len = skb_frag_size(frag);
@@ -481,15 +451,13 @@ int ionic_xdp_xmit(struct net_device *netdev, int n,
 	return nxmit;
 }
 
-static void ionic_xdp_rx_put_bufs(struct ionic_queue *q,
-				  struct ionic_buf_info *buf_info,
-				  int nbufs)
+static void ionic_xdp_rx_unlink_bufs(struct ionic_queue *q,
+				     struct ionic_buf_info *buf_info,
+				     int nbufs)
 {
 	int i;
 
 	for (i = 0; i < nbufs; i++) {
-		dma_unmap_page(q->dev, buf_info->dma_addr,
-			       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
 		buf_info->page = NULL;
 		buf_info++;
 	}
@@ -516,11 +484,9 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 	frag_len = min_t(u16, len, IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN);
 	xdp_prepare_buff(&xdp_buf, ionic_rx_buf_va(buf_info),
 			 XDP_PACKET_HEADROOM, frag_len, false);
-
-	dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(buf_info),
-				      XDP_PACKET_HEADROOM, frag_len,
-				      DMA_FROM_DEVICE);
-
+	page_pool_dma_sync_for_cpu(rxq->page_pool, buf_info->page,
+				   buf_info->page_offset + XDP_PACKET_HEADROOM,
+				   frag_len);
 	prefetchw(&xdp_buf.data_hard_start);
 
 	/*  We limit MTU size to one buffer if !xdp_has_frags, so
@@ -542,15 +508,16 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 		do {
 			if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) {
 				err = -ENOSPC;
-				goto out_xdp_abort;
+				break;
 			}
 
 			frag = &sinfo->frags[sinfo->nr_frags];
 			sinfo->nr_frags++;
 			bi++;
-			frag_len = min_t(u16, remain_len, ionic_rx_buf_size(bi));
-			dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(bi),
-						      0, frag_len, DMA_FROM_DEVICE);
+			frag_len = min_t(u16, remain_len, bi->len);
+			page_pool_dma_sync_for_cpu(rxq->page_pool, bi->page,
+						   buf_info->page_offset,
+						   frag_len);
 			skb_frag_fill_page_desc(frag, bi->page, 0, frag_len);
 			sinfo->xdp_frags_size += frag_len;
 			remain_len -= frag_len;
@@ -569,14 +536,16 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 		return false;  /* false = we didn't consume the packet */
 
 	case XDP_DROP:
-		ionic_rx_page_free(rxq, buf_info);
+		ionic_rx_put_buf_direct(rxq, buf_info);
 		stats->xdp_drop++;
 		break;
 
 	case XDP_TX:
 		xdpf = xdp_convert_buff_to_frame(&xdp_buf);
-		if (!xdpf)
-			goto out_xdp_abort;
+		if (!xdpf) {
+			err = -ENOSPC;
+			break;
+		}
 
 		txq = rxq->partner;
 		nq = netdev_get_tx_queue(netdev, txq->index);
@@ -588,7 +557,8 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 					  ionic_q_space_avail(txq),
 					  1, 1)) {
 			__netif_tx_unlock(nq);
-			goto out_xdp_abort;
+			err = -EIO;
+			break;
 		}
 
 		err = ionic_xdp_post_frame(txq, xdpf, XDP_TX,
@@ -598,49 +568,47 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 		__netif_tx_unlock(nq);
 		if (unlikely(err)) {
 			netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err);
-			goto out_xdp_abort;
+			break;
 		}
-		ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs);
+		ionic_xdp_rx_unlink_bufs(rxq, buf_info, nbufs);
 		stats->xdp_tx++;
-
-		/* the Tx completion will free the buffers */
 		break;
 
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog);
 		if (unlikely(err)) {
 			netdev_dbg(netdev, "xdp_do_redirect err %d\n", err);
-			goto out_xdp_abort;
+			break;
 		}
-		ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs);
+		ionic_xdp_rx_unlink_bufs(rxq, buf_info, nbufs);
 		rxq->xdp_flush = true;
 		stats->xdp_redirect++;
 		break;
 
 	case XDP_ABORTED:
 	default:
-		goto out_xdp_abort;
+		err = -EIO;
+		break;
 	}
 
-	return true;
-
-out_xdp_abort:
-	trace_xdp_exception(netdev, xdp_prog, xdp_action);
-	ionic_rx_page_free(rxq, buf_info);
-	stats->xdp_aborted++;
+	if (err) {
+		ionic_rx_put_buf_direct(rxq, buf_info);
+		trace_xdp_exception(netdev, xdp_prog, xdp_action);
+		stats->xdp_aborted++;
+	}
 
 	return true;
 }
 
 static void ionic_rx_clean(struct ionic_queue *q,
 			   struct ionic_rx_desc_info *desc_info,
-			   struct ionic_rxq_comp *comp)
+			   struct ionic_rxq_comp *comp,
+			   struct bpf_prog *xdp_prog)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
-	struct bpf_prog *xdp_prog;
-	unsigned int headroom;
+	unsigned int headroom = 0;
 	struct sk_buff *skb;
 	bool synced = false;
 	bool use_copybreak;
@@ -648,7 +616,14 @@ static void ionic_rx_clean(struct ionic_queue *q,
 
 	stats = q_to_rx_stats(q);
 
-	if (comp->status) {
+	if (unlikely(comp->status)) {
+		/* Most likely status==2 and the pkt received was bigger
+		 * than the buffer available: comp->len will show the
+		 * pkt size received that didn't fit the advertised desc.len
+		 */
+		dev_dbg(q->dev, "q%d drop comp->status %d comp->len %d desc->len %d\n",
+			q->index, comp->status, comp->len, q->rxq[q->head_idx].len);
+
 		stats->dropped++;
 		return;
 	}
@@ -657,18 +632,18 @@ static void ionic_rx_clean(struct ionic_queue *q,
 	stats->pkts++;
 	stats->bytes += len;
 
-	xdp_prog = READ_ONCE(q->lif->xdp_prog);
 	if (xdp_prog) {
 		if (ionic_run_xdp(stats, netdev, xdp_prog, q, desc_info->bufs, len))
 			return;
 		synced = true;
+		headroom = XDP_PACKET_HEADROOM;
 	}
 
-	headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0;
 	use_copybreak = len <= q->lif->rx_copybreak;
 	if (use_copybreak)
 		skb = ionic_rx_copybreak(netdev, q, desc_info,
-					 headroom, len, synced);
+					 headroom, len,
+					 comp->num_sg_elems, synced);
 	else
 		skb = ionic_rx_build_skb(q, desc_info, headroom, len,
 					 comp->num_sg_elems, synced);
@@ -744,7 +719,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 		napi_gro_frags(&qcq->napi);
 }
 
-bool ionic_rx_service(struct ionic_cq *cq)
+static bool __ionic_rx_service(struct ionic_cq *cq, struct bpf_prog *xdp_prog)
 {
 	struct ionic_rx_desc_info *desc_info;
 	struct ionic_queue *q = cq->bound_q;
@@ -766,11 +741,16 @@ bool ionic_rx_service(struct ionic_cq *cq)
 	q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 
 	/* clean the related q entry, only one per qc completion */
-	ionic_rx_clean(q, desc_info, comp);
+	ionic_rx_clean(q, desc_info, comp, xdp_prog);
 
 	return true;
 }
 
+bool ionic_rx_service(struct ionic_cq *cq)
+{
+	return __ionic_rx_service(cq, NULL);
+}
+
 static inline void ionic_write_cmb_desc(struct ionic_queue *q,
 					void *desc)
 {
@@ -781,7 +761,7 @@ static inline void ionic_write_cmb_desc(struct ionic_queue *q,
 		memcpy_toio(&q->cmb_txq[q->head_idx], desc, sizeof(q->cmb_txq[0]));
 }
 
-void ionic_rx_fill(struct ionic_queue *q)
+void ionic_rx_fill(struct ionic_queue *q, struct bpf_prog *xdp_prog)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_rx_desc_info *desc_info;
@@ -789,6 +769,9 @@ void ionic_rx_fill(struct ionic_queue *q)
 	struct ionic_buf_info *buf_info;
 	unsigned int fill_threshold;
 	struct ionic_rxq_desc *desc;
+	unsigned int first_frag_len;
+	unsigned int first_buf_len;
+	unsigned int headroom = 0;
 	unsigned int remain_len;
 	unsigned int frag_len;
 	unsigned int nfrags;
@@ -806,35 +789,43 @@ void ionic_rx_fill(struct ionic_queue *q)
 
 	len = netdev->mtu + VLAN_ETH_HLEN;
 
-	for (i = n_fill; i; i--) {
-		unsigned int headroom;
-		unsigned int buf_len;
+	if (xdp_prog) {
+		/* Always alloc the full size buffer, but only need
+		 * the actual frag_len in the descriptor
+		 * XDP uses space in the first buffer, so account for
+		 * head room, tail room, and ip header in the first frag size.
+		 */
+		headroom = XDP_PACKET_HEADROOM;
+		first_buf_len = IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN + headroom;
+		first_frag_len = min_t(u16, len + headroom, first_buf_len);
+	} else {
+		/* Use MTU size if smaller than max buffer size */
+		first_frag_len = min_t(u16, len, IONIC_PAGE_SIZE);
+		first_buf_len = first_frag_len;
+	}
 
+	for (i = n_fill; i; i--) {
+		/* fill main descriptor - buf[0] */
 		nfrags = 0;
 		remain_len = len;
 		desc = &q->rxq[q->head_idx];
 		desc_info = &q->rx_info[q->head_idx];
 		buf_info = &desc_info->bufs[0];
 
-		if (!buf_info->page) { /* alloc a new buffer? */
-			if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
-				desc->addr = 0;
-				desc->len = 0;
-				return;
-			}
+		buf_info->len = first_buf_len;
+		frag_len = first_frag_len - headroom;
+
+		/* get a new buffer if we can't reuse one */
+		if (!buf_info->page)
+			buf_info->page = page_pool_alloc(q->page_pool,
+							 &buf_info->page_offset,
+							 &buf_info->len,
+							 GFP_ATOMIC);
+		if (unlikely(!buf_info->page)) {
+			buf_info->len = 0;
+			return;
 		}
 
-		/* fill main descriptor - buf[0]
-		 * XDP uses space in the first buffer, so account for
-		 * head room, tail room, and ip header in the first frag size.
-		 */
-		headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0;
-		if (q->xdp_rxq_info)
-			buf_len = IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN;
-		else
-			buf_len = ionic_rx_buf_size(buf_info);
-		frag_len = min_t(u16, len, buf_len);
-
 		desc->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info) + headroom);
 		desc->len = cpu_to_le16(frag_len);
 		remain_len -= frag_len;
@@ -844,16 +835,26 @@ void ionic_rx_fill(struct ionic_queue *q)
 		/* fill sg descriptors - buf[1..n] */
 		sg_elem = q->rxq_sgl[q->head_idx].elems;
 		for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++, sg_elem++) {
-			if (!buf_info->page) { /* alloc a new sg buffer? */
-				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
-					sg_elem->addr = 0;
-					sg_elem->len = 0;
+			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE);
+
+			/* Recycle any leftover buffers that are too small to reuse */
+			if (unlikely(buf_info->page && buf_info->len < frag_len))
+				ionic_rx_put_buf_direct(q, buf_info);
+
+			/* Get new buffer if needed */
+			if (!buf_info->page) {
+				buf_info->len = frag_len;
+				buf_info->page = page_pool_alloc(q->page_pool,
+								 &buf_info->page_offset,
+								 &buf_info->len,
+								 GFP_ATOMIC);
+				if (unlikely(!buf_info->page)) {
+					buf_info->len = 0;
 					return;
 				}
 			}
 
 			sg_elem->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info));
-			frag_len = min_t(u16, remain_len, ionic_rx_buf_size(buf_info));
 			sg_elem->len = cpu_to_le16(frag_len);
 			remain_len -= frag_len;
 			buf_info++;
@@ -883,17 +884,12 @@ void ionic_rx_fill(struct ionic_queue *q)
 void ionic_rx_empty(struct ionic_queue *q)
 {
 	struct ionic_rx_desc_info *desc_info;
-	struct ionic_buf_info *buf_info;
 	unsigned int i, j;
 
 	for (i = 0; i < q->num_descs; i++) {
 		desc_info = &q->rx_info[i];
-		for (j = 0; j < ARRAY_SIZE(desc_info->bufs); j++) {
-			buf_info = &desc_info->bufs[j];
-			if (buf_info->page)
-				ionic_rx_page_free(q, buf_info);
-		}
-
+		for (j = 0; j < ARRAY_SIZE(desc_info->bufs); j++)
+			ionic_rx_put_buf(q, &desc_info->bufs[j]);
 		desc_info->nbufs = 0;
 	}
 
@@ -974,6 +970,32 @@ static void ionic_xdp_do_flush(struct ionic_cq *cq)
 	}
 }
 
+static unsigned int ionic_rx_cq_service(struct ionic_cq *cq,
+					unsigned int work_to_do)
+{
+	struct ionic_queue *q = cq->bound_q;
+	unsigned int work_done = 0;
+	struct bpf_prog *xdp_prog;
+
+	if (work_to_do == 0)
+		return 0;
+
+	xdp_prog = READ_ONCE(q->xdp_prog);
+	while (__ionic_rx_service(cq, xdp_prog)) {
+		if (cq->tail_idx == cq->num_descs - 1)
+			cq->done_color = !cq->done_color;
+
+		cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
+
+		if (++work_done >= work_to_do)
+			break;
+	}
+	ionic_rx_fill(q, xdp_prog);
+	ionic_xdp_do_flush(cq);
+
+	return work_done;
+}
+
 int ionic_rx_napi(struct napi_struct *napi, int budget)
 {
 	struct ionic_qcq *qcq = napi_to_qcq(napi);
@@ -984,12 +1006,8 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
 	if (unlikely(!budget))
 		return budget;
 
-	work_done = ionic_cq_service(cq, budget,
-				     ionic_rx_service, NULL, NULL);
-
-	ionic_rx_fill(cq->bound_q);
+	work_done = ionic_rx_cq_service(cq, budget);
 
-	ionic_xdp_do_flush(cq);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
 		flags |= IONIC_INTR_CRED_UNMASK;
@@ -1030,12 +1048,8 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 	if (unlikely(!budget))
 		return budget;
 
-	rx_work_done = ionic_cq_service(rxcq, budget,
-					ionic_rx_service, NULL, NULL);
-
-	ionic_rx_fill(rxcq->bound_q);
+	rx_work_done = ionic_rx_cq_service(rxcq, budget);
 
-	ionic_xdp_do_flush(rxcq);
 	if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
 		ionic_dim_update(rxqcq, 0);
 		flags |= IONIC_INTR_CRED_UNMASK;
@@ -1166,7 +1180,7 @@ static void ionic_tx_clean(struct ionic_queue *q,
 	struct sk_buff *skb;
 
 	if (desc_info->xdpf) {
-		ionic_xdp_tx_desc_clean(q->partner, desc_info);
+		ionic_xdp_tx_desc_clean(q->partner, desc_info, in_napi);
 		stats->clean++;
 
 		if (unlikely(__netif_subqueue_stopped(q->lif->netdev, q->index)))
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
index 9e73e324e7a1..b2b9a2dc9eb8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
@@ -4,9 +4,11 @@
 #ifndef _IONIC_TXRX_H_
 #define _IONIC_TXRX_H_
 
+struct bpf_prog;
+
 void ionic_tx_flush(struct ionic_cq *cq);
 
-void ionic_rx_fill(struct ionic_queue *q);
+void ionic_rx_fill(struct ionic_queue *q, struct bpf_prog *xdp_prog);
 void ionic_rx_empty(struct ionic_queue *q);
 void ionic_tx_empty(struct ionic_queue *q);
 int ionic_rx_napi(struct napi_struct *napi, int budget);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index ed24d6af7487..9cff0a8ffb2c 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -3185,8 +3185,7 @@ netxen_list_config_ip(struct netxen_adapter *adapter,
 	struct list_head *head;
 	bool ret = false;
 
-	dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL;
-
+	dev = ifa->ifa_dev->dev;
 	if (dev == NULL)
 		goto out;
 
@@ -3379,7 +3378,7 @@ netxen_inetaddr_event(struct notifier_block *this,
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	unsigned long ip_event;
 
-	dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL;
+	dev = ifa->ifa_dev->dev;
 	ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN;
 recheck:
 	if (dev == NULL)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 63e3dac4d5f7..9d6399a5c780 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -326,25 +326,18 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *i
 	struct qede_ptp *ptp = edev->ptp;
 
 	if (!ptp) {
-		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-					SOF_TIMESTAMPING_RX_SOFTWARE |
-					SOF_TIMESTAMPING_SOFTWARE;
-		info->phc_index = -1;
+		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE;
 
 		return 0;
 	}
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	if (ptp->clock)
 		info->phc_index = ptp_clock_index(ptp->clock);
-	else
-		info->phc_index = -1;
 
 	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
 			   BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index b25102fded7b..3d0b5cd978cb 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1608,7 +1608,6 @@ void qlcnic_release_tx_buffers(struct qlcnic_adapter *,
 			       struct qlcnic_host_tx_ring *);
 
 int qlcnic_check_fw_status(struct qlcnic_adapter *adapter);
-void qlcnic_watchdog_task(struct work_struct *work);
 void qlcnic_post_rx_buffers(struct qlcnic_adapter *adapter,
 		struct qlcnic_host_rds_ring *rds_ring, u8 ring_id);
 void qlcnic_set_multi(struct net_device *netdev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index bcef8ab715bf..d7cdea8f604d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2042,12 +2042,14 @@ int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *adapter, int mode)
 
 int qlcnic_83xx_config_rss(struct qlcnic_adapter *adapter, int enable)
 {
-	int err;
-	u32 word;
 	struct qlcnic_cmd_args cmd;
-	const u64 key[] = { 0xbeac01fa6a42b73bULL, 0x8030f20c77cb2da3ULL,
-			    0xae7b30b4d0ca2bcbULL, 0x43a38fb04167253dULL,
-			    0x255b0ec26d5a56daULL };
+	static const u64 key[] = {
+		0xbeac01fa6a42b73bULL, 0x8030f20c77cb2da3ULL,
+		0xae7b30b4d0ca2bcbULL, 0x43a38fb04167253dULL,
+		0x255b0ec26d5a56daULL
+	};
+	u32 word;
+	int err;
 
 	err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIGURE_RSS);
 	if (err)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 23cd47d588e5..a55fe6ac06c7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -539,7 +539,6 @@ int qlcnic_83xx_setup_intr(struct qlcnic_adapter *);
 void qlcnic_83xx_get_func_no(struct qlcnic_adapter *);
 int qlcnic_83xx_cam_lock(struct qlcnic_adapter *);
 void qlcnic_83xx_cam_unlock(struct qlcnic_adapter *);
-int qlcnic_send_ctrl_op(struct qlcnic_adapter *, struct qlcnic_cmd_args *, u32);
 void qlcnic_83xx_add_sysfs(struct qlcnic_adapter *);
 void qlcnic_83xx_remove_sysfs(struct qlcnic_adapter *);
 void qlcnic_83xx_write_crb(struct qlcnic_adapter *, char *, loff_t, size_t);
@@ -577,8 +576,6 @@ int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *, u8 *, u8);
 int qlcnic_83xx_alloc_mbx_args(struct qlcnic_cmd_args *,
 			       struct qlcnic_adapter *, u32);
 void qlcnic_free_mbx_args(struct qlcnic_cmd_args *);
-void qlcnic_set_npar_data(struct qlcnic_adapter *, const struct qlcnic_info *,
-			  struct qlcnic_info *);
 int qlcnic_83xx_config_intr_coal(struct qlcnic_adapter *,
 				 struct ethtool_coalesce *);
 int qlcnic_83xx_set_rx_tx_intr_coal(struct qlcnic_adapter *);
@@ -590,7 +587,6 @@ irqreturn_t qlcnic_83xx_intr(int, void *);
 irqreturn_t qlcnic_83xx_tmp_intr(int, void *);
 void qlcnic_83xx_check_vf(struct qlcnic_adapter *,
 			  const struct pci_device_id *);
-int qlcnic_83xx_config_default_opmode(struct qlcnic_adapter *);
 int qlcnic_83xx_setup_mbx_intr(struct qlcnic_adapter *);
 void qlcnic_83xx_free_mbx_intr(struct qlcnic_adapter *);
 void qlcnic_83xx_register_map(struct qlcnic_hardware_context *);
@@ -602,8 +598,6 @@ int qlcnic_83xx_flash_bulk_write(struct qlcnic_adapter *, u32, u32 *, int);
 int qlcnic_83xx_flash_write32(struct qlcnic_adapter *, u32, u32 *);
 int qlcnic_83xx_lock_flash(struct qlcnic_adapter *);
 void qlcnic_83xx_unlock_flash(struct qlcnic_adapter *);
-int qlcnic_83xx_save_flash_status(struct qlcnic_adapter *);
-int qlcnic_83xx_restore_flash_status(struct qlcnic_adapter *, int);
 int qlcnic_83xx_read_flash_mfg_id(struct qlcnic_adapter *);
 int qlcnic_83xx_read_flash_descriptor_table(struct qlcnic_adapter *);
 int qlcnic_83xx_flash_read32(struct qlcnic_adapter *, u32, u8 *, int);
@@ -616,13 +610,9 @@ void qlcnic_83xx_idc_exit(struct qlcnic_adapter *);
 void qlcnic_83xx_idc_request_reset(struct qlcnic_adapter *, u32);
 int qlcnic_83xx_lock_driver(struct qlcnic_adapter *);
 void qlcnic_83xx_unlock_driver(struct qlcnic_adapter *);
-int qlcnic_83xx_set_default_offload_settings(struct qlcnic_adapter *);
 int qlcnic_83xx_idc_vnic_pf_entry(struct qlcnic_adapter *);
 int qlcnic_83xx_disable_vnic_mode(struct qlcnic_adapter *, int);
 int qlcnic_83xx_config_vnic_opmode(struct qlcnic_adapter *);
-int qlcnic_83xx_get_vnic_vport_info(struct qlcnic_adapter *,
-				    struct qlcnic_info *, u8);
-int qlcnic_83xx_get_vnic_pf_info(struct qlcnic_adapter *, struct qlcnic_info *);
 int qlcnic_83xx_set_port_eswitch_status(struct qlcnic_adapter *, int, int *);
 
 void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 90df4a0909fa..b3588a1ebc25 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -4146,7 +4146,7 @@ qlcnic_inetaddr_event(struct notifier_block *this,
 
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 
-	dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL;
+	dev = ifa->ifa_dev->dev;
 
 recheck:
 	if (dev == NULL)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index f1e40aade127..4f0ddcedfa97 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -286,7 +286,7 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
 	rmnet_dev->needs_free_netdev = true;
 	rmnet_dev->ethtool_ops = &rmnet_ethtool_ops;
 
-	rmnet_dev->features |= NETIF_F_LLTX;
+	rmnet_dev->lltx = true;
 
 	/* This perm addr will be used as interface identifier by IPv6 */
 	rmnet_dev->addr_assign_type = NET_ADDR_RANDOM;
diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index 03015b665f4e..8a8ea51c639e 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig
@@ -120,4 +120,23 @@ config R8169_LEDS
 	  Optional support for controlling the NIC LED's with the netdev
 	  LED trigger.
 
+config RTASE
+	tristate "Realtek Automotive Switch 9054/9068/9072/9075/9068/9071 PCIe Interface support"
+	depends on PCI
+	select CRC32
+	select PAGE_POOL
+	help
+	  Say Y here and it will be compiled and linked with the kernel
+	  if you have a Realtek Ethernet adapter belonging to the
+	  following families:
+	  RTL9054 5GBit Ethernet
+	  RTL9068 5GBit Ethernet
+	  RTL9072 5GBit Ethernet
+	  RTL9075 5GBit Ethernet
+	  RTL9068 5GBit Ethernet
+	  RTL9071 5GBit Ethernet
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called rtase. This is recommended.
+
 endif # NET_VENDOR_REALTEK
diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile
index 635491d8826e..046adf503ff4 100644
--- a/drivers/net/ethernet/realtek/Makefile
+++ b/drivers/net/ethernet/realtek/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_ATP) += atp.o
 r8169-y += r8169_main.o r8169_firmware.o r8169_phy_config.o
 r8169-$(CONFIG_R8169_LEDS) += r8169_leds.o
 obj-$(CONFIG_R8169) += r8169.o
+obj-$(CONFIG_RTASE) += rtase/
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 00882ffc7a02..e2db944e6fa8 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -69,6 +69,7 @@ enum mac_version {
 	RTL_GIGA_MAC_VER_61,
 	RTL_GIGA_MAC_VER_63,
 	RTL_GIGA_MAC_VER_65,
+	RTL_GIGA_MAC_VER_66,
 	RTL_GIGA_MAC_NONE
 };
 
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 714d2e804694..305ec19ccef1 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -56,6 +56,7 @@
 #define FIRMWARE_8125A_3	"rtl_nic/rtl8125a-3.fw"
 #define FIRMWARE_8125B_2	"rtl_nic/rtl8125b-2.fw"
 #define FIRMWARE_8126A_2	"rtl_nic/rtl8126a-2.fw"
+#define FIRMWARE_8126A_3	"rtl_nic/rtl8126a-3.fw"
 
 #define TX_DMA_BURST	7	/* Maximum PCI burst, '7' is unlimited */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
@@ -138,6 +139,7 @@ static const struct {
 	/* reserve 62 for CFG_METHOD_4 in the vendor driver */
 	[RTL_GIGA_MAC_VER_63] = {"RTL8125B",		FIRMWARE_8125B_2},
 	[RTL_GIGA_MAC_VER_65] = {"RTL8126A",		FIRMWARE_8126A_2},
+	[RTL_GIGA_MAC_VER_66] = {"RTL8126A",		FIRMWARE_8126A_3},
 };
 
 static const struct pci_device_id rtl8169_pci_tbl[] = {
@@ -576,7 +578,34 @@ struct rtl8169_counters {
 	__le64	rx_broadcast;
 	__le32	rx_multicast;
 	__le16	tx_aborted;
-	__le16	tx_underun;
+	__le16	tx_underrun;
+	/* new since RTL8125 */
+	__le64 tx_octets;
+	__le64 rx_octets;
+	__le64 rx_multicast64;
+	__le64 tx_unicast64;
+	__le64 tx_broadcast64;
+	__le64 tx_multicast64;
+	__le32 tx_pause_on;
+	__le32 tx_pause_off;
+	__le32 tx_pause_all;
+	__le32 tx_deferred;
+	__le32 tx_late_collision;
+	__le32 tx_all_collision;
+	__le32 tx_aborted32;
+	__le32 align_errors32;
+	__le32 rx_frame_too_long;
+	__le32 rx_runt;
+	__le32 rx_pause_on;
+	__le32 rx_pause_off;
+	__le32 rx_pause_all;
+	__le32 rx_unknown_opcode;
+	__le32 rx_mac_error;
+	__le32 tx_underrun32;
+	__le32 rx_mac_missed;
+	__le32 rx_tcam_dropped;
+	__le32 tdu;
+	__le32 rdu;
 };
 
 struct rtl8169_tc_offsets {
@@ -679,6 +708,7 @@ MODULE_FIRMWARE(FIRMWARE_8107E_2);
 MODULE_FIRMWARE(FIRMWARE_8125A_3);
 MODULE_FIRMWARE(FIRMWARE_8125B_2);
 MODULE_FIRMWARE(FIRMWARE_8126A_2);
+MODULE_FIRMWARE(FIRMWARE_8126A_3);
 
 static inline struct device *tp_to_dev(struct rtl8169_private *tp)
 {
@@ -1201,7 +1231,7 @@ static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
 	case RTL_GIGA_MAC_VER_31:
 		r8168dp_2_mdio_write(tp, location, val);
 		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66:
 		r8168g_mdio_write(tp, location, val);
 		break;
 	default:
@@ -1216,7 +1246,7 @@ static int rtl_readphy(struct rtl8169_private *tp, int location)
 	case RTL_GIGA_MAC_VER_28:
 	case RTL_GIGA_MAC_VER_31:
 		return r8168dp_2_mdio_read(tp, location);
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66:
 		return r8168g_mdio_read(tp, location);
 	default:
 		return r8169_mdio_read(tp, location);
@@ -1425,7 +1455,7 @@ static void rtl_set_d3_pll_down(struct rtl8169_private *tp, bool enable)
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26:
 	case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30:
 	case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_66:
 		if (enable)
 			RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~D3_NO_PLL_DOWN);
 		else
@@ -1592,7 +1622,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		break;
 	case RTL_GIGA_MAC_VER_34:
 	case RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_66:
 		if (wolopts)
 			rtl_mod_config2(tp, 0, PME_SIGNAL);
 		else
@@ -1841,7 +1871,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 	data[9] = le64_to_cpu(counters->rx_broadcast);
 	data[10] = le32_to_cpu(counters->rx_multicast);
 	data[11] = le16_to_cpu(counters->tx_aborted);
-	data[12] = le16_to_cpu(counters->tx_underun);
+	data[12] = le16_to_cpu(counters->tx_underrun);
 }
 
 static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -2071,6 +2101,7 @@ static void rtl_set_eee_txidle_timer(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_61:
 	case RTL_GIGA_MAC_VER_63:
 	case RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_66:
 		tp->tx_lpi_timer = timer_val;
 		RTL_W16(tp, EEE_TXIDLE_TIMER_8125, timer_val);
 		break;
@@ -2199,6 +2230,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
 		enum mac_version ver;
 	} mac_info[] = {
 		/* 8126A family. */
+		{ 0x7cf, 0x64a,	RTL_GIGA_MAC_VER_66 },
 		{ 0x7cf, 0x649,	RTL_GIGA_MAC_VER_65 },
 
 		/* 8125B family. */
@@ -2470,6 +2502,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 		break;
 	case RTL_GIGA_MAC_VER_63:
 	case RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_66:
 		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST |
 			RX_PAUSE_SLOT_ON);
 		break;
@@ -2656,7 +2689,7 @@ static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_61:
 		rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
 		break;
-	case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_66:
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
 		rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond_2, 100, 42);
@@ -2899,7 +2932,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
 		rtl_eri_set_bits(tp, 0xd4, 0x0c00);
 		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66:
 		r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
 		break;
 	default:
@@ -2913,7 +2946,7 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
 		rtl_eri_clear_bits(tp, 0xd4, 0x1f00);
 		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66:
 		r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0);
 		break;
 	default:
@@ -2940,6 +2973,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 		rtl_mod_config5(tp, 0, ASPM_en);
 		switch (tp->mac_version) {
 		case RTL_GIGA_MAC_VER_65:
+		case RTL_GIGA_MAC_VER_66:
 			val8 = RTL_R8(tp, INT_CFG0_8125) | INT_CFG0_CLKREQEN;
 			RTL_W8(tp, INT_CFG0_8125, val8);
 			break;
@@ -2950,7 +2984,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 
 		switch (tp->mac_version) {
 		case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
-		case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
+		case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66:
 			/* reset ephy tx/rx disable timer */
 			r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
 			/* chip can trigger L1.2 */
@@ -2962,7 +2996,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 	} else {
 		switch (tp->mac_version) {
 		case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
-		case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
+		case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66:
 			r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
 			break;
 		default:
@@ -2971,6 +3005,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 
 		switch (tp->mac_version) {
 		case RTL_GIGA_MAC_VER_65:
+		case RTL_GIGA_MAC_VER_66:
 			val8 = RTL_R8(tp, INT_CFG0_8125) & ~INT_CFG0_CLKREQEN;
 			RTL_W8(tp, INT_CFG0_8125, val8);
 			break;
@@ -3690,10 +3725,12 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	/* disable new tx descriptor format */
 	r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000);
 
-	if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+	if (tp->mac_version == RTL_GIGA_MAC_VER_65 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_66)
 		RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02);
 
-	if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+	if (tp->mac_version == RTL_GIGA_MAC_VER_65 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_66)
 		r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);
 	else if (tp->mac_version == RTL_GIGA_MAC_VER_63)
 		r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200);
@@ -3711,7 +3748,8 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 	r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030);
 	r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000);
 	r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001);
-	if (tp->mac_version == RTL_GIGA_MAC_VER_65)
+	if (tp->mac_version == RTL_GIGA_MAC_VER_65 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_66)
 		r8168_mac_ocp_modify(tp, 0xea1c, 0x0300, 0x0000);
 	else
 		r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
@@ -3825,6 +3863,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
 		[RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2,
 		[RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b,
 		[RTL_GIGA_MAC_VER_65] = rtl_hw_start_8126a,
+		[RTL_GIGA_MAC_VER_66] = rtl_hw_start_8126a,
 	};
 
 	if (hw_configs[tp->mac_version])
@@ -3845,6 +3884,7 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp)
 		break;
 	case RTL_GIGA_MAC_VER_63:
 	case RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_66:
 		for (i = 0xa00; i < 0xa80; i += 4)
 			RTL_W32(tp, i, 0);
 		RTL_W16(tp, INT_CFG1_8125, 0x0000);
@@ -4073,7 +4113,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp)
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66:
 		rtl_enable_rxdvgate(tp);
 		fsleep(2000);
 		break;
@@ -4224,7 +4264,7 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp,
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_34:
-	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66:
 		padto = max_t(unsigned int, padto, ETH_ZLEN);
 		break;
 	default:
@@ -4349,7 +4389,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	if (unlikely(!rtl_tx_slots_avail(tp))) {
 		if (net_ratelimit())
 			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
-		goto err_stop_0;
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
 	}
 
 	opts[1] = rtl8169_tx_vlan_tag(skb);
@@ -4405,11 +4446,6 @@ err_dma_0:
 	dev_kfree_skb_any(skb);
 	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
-
-err_stop_0:
-	netif_stop_queue(dev);
-	dev->stats.tx_dropped++;
-	return NETDEV_TX_BUSY;
 }
 
 static unsigned int rtl_last_frag_len(struct sk_buff *skb)
@@ -5261,7 +5297,7 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
 		rtl_hw_init_8168g(tp);
 		break;
-	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_65:
+	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66:
 		rtl_hw_init_8125(tp);
 		break;
 	default:
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index 1f74317beb88..cf29b1208482 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -1060,6 +1060,7 @@ static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp,
 	phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
 	rtl8168g_enable_gphy_10m(phydev);
 
+	rtl8168g_disable_aldps(phydev);
 	rtl8125a_config_eee_phy(phydev);
 }
 
@@ -1099,6 +1100,7 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp,
 	phy_modify_paged(phydev, 0xbf8, 0x12, 0xe000, 0xa000);
 
 	rtl8125_legacy_force_mode(phydev);
+	rtl8168g_disable_aldps(phydev);
 	rtl8125b_config_eee_phy(phydev);
 }
 
@@ -1159,6 +1161,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
 		[RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config,
 		[RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config,
 		[RTL_GIGA_MAC_VER_65] = rtl8126a_hw_phy_config,
+		[RTL_GIGA_MAC_VER_66] = rtl8126a_hw_phy_config,
 	};
 
 	if (phy_configs[ver])
diff --git a/drivers/net/ethernet/realtek/rtase/Makefile b/drivers/net/ethernet/realtek/rtase/Makefile
new file mode 100644
index 000000000000..ba3d8550f9e6
--- /dev/null
+++ b/drivers/net/ethernet/realtek/rtase/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# Copyright(c) 2024 Realtek Semiconductor Corp. All rights reserved.
+
+#
+# Makefile for the Realtek PCIe driver
+#
+
+obj-$(CONFIG_RTASE) += rtase.o
+
+rtase-objs := rtase_main.o
diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h
new file mode 100644
index 000000000000..583c33930f88
--- /dev/null
+++ b/drivers/net/ethernet/realtek/rtase/rtase.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ *  rtase is the Linux device driver released for Realtek Automotive Switch
+ *  controllers with PCI-Express interface.
+ *
+ *  Copyright(c) 2024 Realtek Semiconductor Corp.
+ */
+
+#ifndef RTASE_H
+#define RTASE_H
+
+#define RTASE_HW_VER_MASK 0x7C800000
+
+#define RTASE_RX_DMA_BURST_256       4
+#define RTASE_TX_DMA_BURST_UNLIMITED 7
+
+#define RTASE_RX_BUF_SIZE    (PAGE_SIZE - \
+			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define RTASE_MAX_JUMBO_SIZE (RTASE_RX_BUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN)
+
+/* 3 means InterFrameGap = the shortest one */
+#define RTASE_INTERFRAMEGAP 0x03
+
+#define RTASE_REGS_SIZE     256
+#define RTASE_PCI_REGS_SIZE 0x100
+
+#define RTASE_MULTICAST_FILTER_MASK GENMASK(30, 26)
+
+#define RTASE_VLAN_FILTER_ENTRY_NUM 32
+#define RTASE_NUM_TX_QUEUE 8
+#define RTASE_NUM_RX_QUEUE 4
+
+#define RTASE_TXQ_CTRL      1
+#define RTASE_FUNC_TXQ_NUM  1
+#define RTASE_FUNC_RXQ_NUM  1
+#define RTASE_INTERRUPT_NUM 1
+
+#define RTASE_MITI_TIME_COUNT_MASK    GENMASK(3, 0)
+#define RTASE_MITI_TIME_UNIT_MASK     GENMASK(7, 4)
+#define RTASE_MITI_DEFAULT_TIME       128
+#define RTASE_MITI_MAX_TIME           491520
+#define RTASE_MITI_PKT_NUM_COUNT_MASK GENMASK(11, 8)
+#define RTASE_MITI_PKT_NUM_UNIT_MASK  GENMASK(13, 12)
+#define RTASE_MITI_DEFAULT_PKT_NUM    64
+#define RTASE_MITI_MAX_PKT_NUM_IDX    3
+#define RTASE_MITI_MAX_PKT_NUM_UNIT   16
+#define RTASE_MITI_MAX_PKT_NUM        240
+#define RTASE_MITI_COUNT_BIT_NUM      4
+
+#define RTASE_NUM_MSIX 4
+
+#define RTASE_DWORD_MOD 16
+
+/*****************************************************************************/
+enum rtase_registers {
+	RTASE_MAC0   = 0x0000,
+	RTASE_MAC4   = 0x0004,
+	RTASE_MAR0   = 0x0008,
+	RTASE_MAR1   = 0x000C,
+	RTASE_DTCCR0 = 0x0010,
+	RTASE_DTCCR4 = 0x0014,
+#define RTASE_COUNTER_RESET BIT(0)
+#define RTASE_COUNTER_DUMP  BIT(3)
+
+	RTASE_FCR = 0x0018,
+#define RTASE_FCR_RXQ_MASK GENMASK(5, 4)
+
+	RTASE_LBK_CTRL = 0x001A,
+#define RTASE_LBK_ATLD BIT(1)
+#define RTASE_LBK_CLR  BIT(0)
+
+	RTASE_TX_DESC_ADDR0   = 0x0020,
+	RTASE_TX_DESC_ADDR4   = 0x0024,
+	RTASE_TX_DESC_COMMAND = 0x0028,
+#define RTASE_TX_DESC_CMD_CS BIT(15)
+#define RTASE_TX_DESC_CMD_WE BIT(14)
+
+	RTASE_BOOT_CTL  = 0x6004,
+	RTASE_CLKSW_SET = 0x6018,
+
+	RTASE_CHIP_CMD = 0x0037,
+#define RTASE_STOP_REQ      BIT(7)
+#define RTASE_STOP_REQ_DONE BIT(6)
+#define RTASE_RE            BIT(3)
+#define RTASE_TE            BIT(2)
+
+	RTASE_IMR0 = 0x0038,
+	RTASE_ISR0 = 0x003C,
+#define RTASE_TOK7 BIT(30)
+#define RTASE_TOK6 BIT(28)
+#define RTASE_TOK5 BIT(26)
+#define RTASE_TOK4 BIT(24)
+#define RTASE_FOVW BIT(6)
+#define RTASE_RDU  BIT(4)
+#define RTASE_TOK  BIT(2)
+#define RTASE_ROK  BIT(0)
+
+	RTASE_IMR1 = 0x0800,
+	RTASE_ISR1 = 0x0802,
+#define RTASE_Q_TOK BIT(4)
+#define RTASE_Q_RDU BIT(1)
+#define RTASE_Q_ROK BIT(0)
+
+	RTASE_EPHY_ISR = 0x6014,
+	RTASE_EPHY_IMR = 0x6016,
+
+	RTASE_TX_CONFIG_0 = 0x0040,
+#define RTASE_TX_INTER_FRAME_GAP_MASK GENMASK(25, 24)
+	/* DMA burst value (0-7) is shift this many bits */
+#define RTASE_TX_DMA_MASK             GENMASK(10, 8)
+
+	RTASE_RX_CONFIG_0 = 0x0044,
+#define RTASE_RX_SINGLE_FETCH  BIT(14)
+#define RTASE_RX_SINGLE_TAG    BIT(13)
+#define RTASE_RX_MX_DMA_MASK   GENMASK(10, 8)
+#define RTASE_ACPT_FLOW        BIT(7)
+#define RTASE_ACCEPT_ERR       BIT(5)
+#define RTASE_ACCEPT_RUNT      BIT(4)
+#define RTASE_ACCEPT_BROADCAST BIT(3)
+#define RTASE_ACCEPT_MULTICAST BIT(2)
+#define RTASE_ACCEPT_MYPHYS    BIT(1)
+#define RTASE_ACCEPT_ALLPHYS   BIT(0)
+#define RTASE_ACCEPT_MASK      (RTASE_ACPT_FLOW | RTASE_ACCEPT_ERR | \
+				RTASE_ACCEPT_RUNT | RTASE_ACCEPT_BROADCAST | \
+				RTASE_ACCEPT_MULTICAST | RTASE_ACCEPT_MYPHYS | \
+				RTASE_ACCEPT_ALLPHYS)
+
+	RTASE_RX_CONFIG_1 = 0x0046,
+#define RTASE_RX_MAX_FETCH_DESC_MASK GENMASK(15, 11)
+#define RTASE_RX_NEW_DESC_FORMAT_EN  BIT(8)
+#define RTASE_OUTER_VLAN_DETAG_EN    BIT(7)
+#define RTASE_INNER_VLAN_DETAG_EN    BIT(6)
+#define RTASE_PCIE_NEW_FLOW          BIT(2)
+#define RTASE_PCIE_RELOAD_EN         BIT(0)
+
+	RTASE_EEM = 0x0050,
+#define RTASE_EEM_UNLOCK 0xC0
+
+	RTASE_TDFNR  = 0x0057,
+	RTASE_TPPOLL = 0x0090,
+	RTASE_PDR    = 0x00B0,
+	RTASE_FIFOR  = 0x00D3,
+#define RTASE_TX_FIFO_EMPTY BIT(5)
+#define RTASE_RX_FIFO_EMPTY BIT(4)
+
+	RTASE_RMS       = 0x00DA,
+	RTASE_CPLUS_CMD = 0x00E0,
+#define RTASE_FORCE_RXFLOW_EN BIT(11)
+#define RTASE_FORCE_TXFLOW_EN BIT(10)
+#define RTASE_RX_CHKSUM       BIT(5)
+
+	RTASE_Q0_RX_DESC_ADDR0 = 0x00E4,
+	RTASE_Q0_RX_DESC_ADDR4 = 0x00E8,
+	RTASE_Q1_RX_DESC_ADDR0 = 0x4000,
+	RTASE_Q1_RX_DESC_ADDR4 = 0x4004,
+	RTASE_MTPS             = 0x00EC,
+#define RTASE_TAG_NUM_SEL_MASK GENMASK(10, 8)
+
+	RTASE_MISC = 0x00F2,
+#define RTASE_RX_DV_GATE_EN BIT(3)
+
+	RTASE_TFUN_CTRL = 0x0400,
+#define RTASE_TX_NEW_DESC_FORMAT_EN BIT(0)
+
+	RTASE_TX_CONFIG_1 = 0x203E,
+#define RTASE_TC_MODE_MASK GENMASK(11, 10)
+
+	RTASE_TOKSEL      = 0x2046,
+	RTASE_RFIFONFULL  = 0x4406,
+	RTASE_INT_MITI_TX = 0x0A00,
+	RTASE_INT_MITI_RX = 0x0A80,
+
+	RTASE_VLAN_ENTRY_0     = 0xAC80,
+};
+
+enum rtase_desc_status_bit {
+	RTASE_DESC_OWN = BIT(31), /* Descriptor is owned by NIC */
+	RTASE_RING_END = BIT(30), /* End of descriptor ring */
+};
+
+enum rtase_sw_flag_content {
+	RTASE_SWF_MSI_ENABLED  = BIT(1),
+	RTASE_SWF_MSIX_ENABLED = BIT(2),
+};
+
+#define RSVD_MASK 0x3FFFC000
+
+struct rtase_tx_desc {
+	__le32 opts1;
+	__le32 opts2;
+	__le64 addr;
+	__le32 opts3;
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
+} __packed;
+
+/*------ offset 0 of tx descriptor ------*/
+#define RTASE_TX_FIRST_FRAG BIT(29) /* Tx First segment of a packet */
+#define RTASE_TX_LAST_FRAG  BIT(28) /* Tx Final segment of a packet */
+#define RTASE_GIANT_SEND_V4 BIT(26) /* TCP Giant Send Offload V4 (GSOv4) */
+#define RTASE_GIANT_SEND_V6 BIT(25) /* TCP Giant Send Offload V6 (GSOv6) */
+#define RTASE_TX_VLAN_TAG   BIT(17) /* Add VLAN tag */
+
+/*------ offset 4 of tx descriptor ------*/
+#define RTASE_TX_UDPCS_C BIT(31) /* Calculate UDP/IP checksum */
+#define RTASE_TX_TCPCS_C BIT(30) /* Calculate TCP/IP checksum */
+#define RTASE_TX_IPCS_C  BIT(29) /* Calculate IP checksum */
+#define RTASE_TX_IPV6F_C BIT(28) /* Indicate it is an IPv6 packet */
+
+union rtase_rx_desc {
+	struct {
+		__le64 header_buf_addr;
+		__le32 reserved1;
+		__le32 opts_header_len;
+		__le64 addr;
+		__le32 reserved2;
+		__le32 opts1;
+	} __packed desc_cmd;
+
+	struct {
+		__le32 reserved1;
+		__le32 reserved2;
+		__le32 rss;
+		__le32 opts4;
+		__le32 reserved3;
+		__le32 opts3;
+		__le32 opts2;
+		__le32 opts1;
+	} __packed desc_status;
+} __packed;
+
+/*------ offset 28 of rx descriptor ------*/
+#define RTASE_RX_FIRST_FRAG BIT(25) /* Rx First segment of a packet */
+#define RTASE_RX_LAST_FRAG  BIT(24) /* Rx Final segment of a packet */
+#define RTASE_RX_RES        BIT(20)
+#define RTASE_RX_RUNT       BIT(19)
+#define RTASE_RX_RWT        BIT(18)
+#define RTASE_RX_CRC        BIT(16)
+#define RTASE_RX_V6F        BIT(31)
+#define RTASE_RX_V4F        BIT(30)
+#define RTASE_RX_UDPT       BIT(29)
+#define RTASE_RX_TCPT       BIT(28)
+#define RTASE_RX_IPF        BIT(26) /* IP checksum failed */
+#define RTASE_RX_UDPF       BIT(25) /* UDP/IP checksum failed */
+#define RTASE_RX_TCPF       BIT(24) /* TCP/IP checksum failed */
+#define RTASE_RX_VLAN_TAG   BIT(16) /* VLAN tag available */
+
+#define RTASE_NUM_DESC          1024
+#define RTASE_TX_BUDGET_DEFAULT 256
+#define RTASE_TX_RING_DESC_SIZE (RTASE_NUM_DESC * sizeof(struct rtase_tx_desc))
+#define RTASE_RX_RING_DESC_SIZE (RTASE_NUM_DESC * sizeof(union rtase_rx_desc))
+#define RTASE_TX_STOP_THRS      (MAX_SKB_FRAGS + 1)
+#define RTASE_TX_START_THRS     (2 * RTASE_TX_STOP_THRS)
+#define RTASE_VLAN_TAG_MASK     GENMASK(15, 0)
+#define RTASE_RX_PKT_SIZE_MASK  GENMASK(13, 0)
+
+#define RTASE_IVEC_NAME_SIZE (IFNAMSIZ + 10)
+
+struct rtase_int_vector {
+	struct rtase_private *tp;
+	unsigned int irq;
+	char name[RTASE_IVEC_NAME_SIZE];
+	u16 index;
+	u16 imr_addr;
+	u16 isr_addr;
+	u32 imr;
+	struct list_head ring_list;
+	struct napi_struct napi;
+	int (*poll)(struct napi_struct *napi, int budget);
+};
+
+struct rtase_ring {
+	struct rtase_int_vector *ivec;
+	void *desc;
+	dma_addr_t phy_addr;
+	u32 cur_idx;
+	u32 dirty_idx;
+	u16 index;
+
+	struct sk_buff *skbuff[RTASE_NUM_DESC];
+	void *data_buf[RTASE_NUM_DESC];
+	union {
+		u32 len[RTASE_NUM_DESC];
+		dma_addr_t data_phy_addr[RTASE_NUM_DESC];
+	} mis;
+
+	struct list_head ring_entry;
+	int (*ring_handler)(struct rtase_ring *ring, int budget);
+	u64 alloc_fail;
+};
+
+struct rtase_stats {
+	u64 tx_dropped;
+	u64 rx_dropped;
+	u64 multicast;
+	u64 rx_errors;
+	u64 rx_length_errors;
+	u64 rx_crc_errors;
+};
+
+struct rtase_private {
+	void __iomem *mmio_addr;
+	u32 sw_flag;
+
+	struct pci_dev *pdev;
+	struct net_device *dev;
+	u32 rx_buf_sz;
+
+	struct page_pool *page_pool;
+	struct rtase_ring tx_ring[RTASE_NUM_TX_QUEUE];
+	struct rtase_ring rx_ring[RTASE_NUM_RX_QUEUE];
+	struct rtase_counters *tally_vaddr;
+	dma_addr_t tally_paddr;
+
+	u32 vlan_filter_ctrl;
+	u16 vlan_filter_vid[RTASE_VLAN_FILTER_ENTRY_NUM];
+
+	struct msix_entry msix_entry[RTASE_NUM_MSIX];
+	struct rtase_int_vector int_vector[RTASE_NUM_MSIX];
+
+	struct rtase_stats stats;
+
+	u16 tx_queue_ctrl;
+	u16 func_tx_queue_num;
+	u16 func_rx_queue_num;
+	u16 int_nums;
+	u16 tx_int_mit;
+	u16 rx_int_mit;
+};
+
+#define RTASE_LSO_64K 64000
+
+#define RTASE_NIC_MAX_PHYS_BUF_COUNT_LSO2 (16 * 4)
+
+#define RTASE_TCPHO_MASK GENMASK(24, 18)
+
+#define RTASE_MSS_MASK GENMASK(28, 18)
+
+#endif /* RTASE_H */
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
new file mode 100644
index 000000000000..f8777b7663d3
--- /dev/null
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -0,0 +1,2288 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ *  rtase is the Linux device driver released for Realtek Automotive Switch
+ *  controllers with PCI-Express interface.
+ *
+ *  Copyright(c) 2024 Realtek Semiconductor Corp.
+ *
+ *  Below is a simplified block diagram of the chip and its relevant interfaces.
+ *
+ *               *************************
+ *               *                       *
+ *               *  CPU network device   *
+ *               *                       *
+ *               *   +-------------+     *
+ *               *   |  PCIE Host  |     *
+ *               ***********++************
+ *                          ||
+ *                         PCIE
+ *                          ||
+ *      ********************++**********************
+ *      *            | PCIE Endpoint |             *
+ *      *            +---------------+             *
+ *      *                | GMAC |                  *
+ *      *                +--++--+  Realtek         *
+ *      *                   ||     RTL90xx Series  *
+ *      *                   ||                     *
+ *      *     +-------------++----------------+    *
+ *      *     |           | MAC |             |    *
+ *      *     |           +-----+             |    *
+ *      *     |                               |    *
+ *      *     |     Ethernet Switch Core      |    *
+ *      *     |                               |    *
+ *      *     |   +-----+           +-----+   |    *
+ *      *     |   | MAC |...........| MAC |   |    *
+ *      *     +---+-----+-----------+-----+---+    *
+ *      *         | PHY |...........| PHY |        *
+ *      *         +--++-+           +--++-+        *
+ *      *************||****************||***********
+ *
+ *  The block of the Realtek RTL90xx series is our entire chip architecture,
+ *  the GMAC is connected to the switch core, and there is no PHY in between.
+ *  In addition, this driver is mainly used to control GMAC, but does not
+ *  control the switch core, so it is not the same as DSA. Linux only plays
+ *  the role of a normal leaf node in this model.
+ */
+
+#include <linux/crc32.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/prefetch.h>
+#include <linux/rtnetlink.h>
+#include <linux/tcp.h>
+#include <asm/irq.h>
+#include <net/ip6_checksum.h>
+#include <net/netdev_queues.h>
+#include <net/page_pool/helpers.h>
+#include <net/pkt_cls.h>
+
+#include "rtase.h"
+
+#define RTK_OPTS1_DEBUG_VALUE 0x0BADBEEF
+#define RTK_MAGIC_NUMBER      0x0BADBADBADBADBAD
+
+static const struct pci_device_id rtase_pci_tbl[] = {
+	{PCI_VDEVICE(REALTEK, 0x906A)},
+	{}
+};
+
+MODULE_DEVICE_TABLE(pci, rtase_pci_tbl);
+
+MODULE_AUTHOR("Realtek ARD Software Team");
+MODULE_DESCRIPTION("Network Driver for the PCIe interface of Realtek Automotive Ethernet Switch");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct rtase_counters {
+	__le64 tx_packets;
+	__le64 rx_packets;
+	__le64 tx_errors;
+	__le32 rx_errors;
+	__le16 rx_missed;
+	__le16 align_errors;
+	__le32 tx_one_collision;
+	__le32 tx_multi_collision;
+	__le64 rx_unicast;
+	__le64 rx_broadcast;
+	__le32 rx_multicast;
+	__le16 tx_aborted;
+	__le16 tx_underrun;
+} __packed;
+
+static void rtase_w8(const struct rtase_private *tp, u16 reg, u8 val8)
+{
+	writeb(val8, tp->mmio_addr + reg);
+}
+
+static void rtase_w16(const struct rtase_private *tp, u16 reg, u16 val16)
+{
+	writew(val16, tp->mmio_addr + reg);
+}
+
+static void rtase_w32(const struct rtase_private *tp, u16 reg, u32 val32)
+{
+	writel(val32, tp->mmio_addr + reg);
+}
+
+static u8 rtase_r8(const struct rtase_private *tp, u16 reg)
+{
+	return readb(tp->mmio_addr + reg);
+}
+
+static u16 rtase_r16(const struct rtase_private *tp, u16 reg)
+{
+	return readw(tp->mmio_addr + reg);
+}
+
+static u32 rtase_r32(const struct rtase_private *tp, u16 reg)
+{
+	return readl(tp->mmio_addr + reg);
+}
+
+static void rtase_free_desc(struct rtase_private *tp)
+{
+	struct pci_dev *pdev = tp->pdev;
+	u32 i;
+
+	for (i = 0; i < tp->func_tx_queue_num; i++) {
+		if (!tp->tx_ring[i].desc)
+			continue;
+
+		dma_free_coherent(&pdev->dev, RTASE_TX_RING_DESC_SIZE,
+				  tp->tx_ring[i].desc,
+				  tp->tx_ring[i].phy_addr);
+		tp->tx_ring[i].desc = NULL;
+	}
+
+	for (i = 0; i < tp->func_rx_queue_num; i++) {
+		if (!tp->rx_ring[i].desc)
+			continue;
+
+		dma_free_coherent(&pdev->dev, RTASE_RX_RING_DESC_SIZE,
+				  tp->rx_ring[i].desc,
+				  tp->rx_ring[i].phy_addr);
+		tp->rx_ring[i].desc = NULL;
+	}
+}
+
+static int rtase_alloc_desc(struct rtase_private *tp)
+{
+	struct pci_dev *pdev = tp->pdev;
+	u32 i;
+
+	/* rx and tx descriptors needs 256 bytes alignment.
+	 * dma_alloc_coherent provides more.
+	 */
+	for (i = 0; i < tp->func_tx_queue_num; i++) {
+		tp->tx_ring[i].desc =
+				dma_alloc_coherent(&pdev->dev,
+						   RTASE_TX_RING_DESC_SIZE,
+						   &tp->tx_ring[i].phy_addr,
+						   GFP_KERNEL);
+		if (!tp->tx_ring[i].desc)
+			goto err_out;
+	}
+
+	for (i = 0; i < tp->func_rx_queue_num; i++) {
+		tp->rx_ring[i].desc =
+				dma_alloc_coherent(&pdev->dev,
+						   RTASE_RX_RING_DESC_SIZE,
+						   &tp->rx_ring[i].phy_addr,
+						   GFP_KERNEL);
+		if (!tp->rx_ring[i].desc)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	rtase_free_desc(tp);
+	return -ENOMEM;
+}
+
+static void rtase_unmap_tx_skb(struct pci_dev *pdev, u32 len,
+			       struct rtase_tx_desc *desc)
+{
+	dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), len,
+			 DMA_TO_DEVICE);
+	desc->opts1 = cpu_to_le32(RTK_OPTS1_DEBUG_VALUE);
+	desc->opts2 = 0x00;
+	desc->addr = cpu_to_le64(RTK_MAGIC_NUMBER);
+}
+
+static void rtase_tx_clear_range(struct rtase_ring *ring, u32 start, u32 n)
+{
+	struct rtase_tx_desc *desc_base = ring->desc;
+	struct rtase_private *tp = ring->ivec->tp;
+	u32 i;
+
+	for (i = 0; i < n; i++) {
+		u32 entry = (start + i) % RTASE_NUM_DESC;
+		struct rtase_tx_desc *desc = desc_base + entry;
+		u32 len = ring->mis.len[entry];
+		struct sk_buff *skb;
+
+		if (len == 0)
+			continue;
+
+		rtase_unmap_tx_skb(tp->pdev, len, desc);
+		ring->mis.len[entry] = 0;
+		skb = ring->skbuff[entry];
+		if (!skb)
+			continue;
+
+		tp->stats.tx_dropped++;
+		dev_kfree_skb_any(skb);
+		ring->skbuff[entry] = NULL;
+	}
+}
+
+static void rtase_tx_clear(struct rtase_private *tp)
+{
+	struct rtase_ring *ring;
+	u16 i;
+
+	for (i = 0; i < tp->func_tx_queue_num; i++) {
+		ring = &tp->tx_ring[i];
+		rtase_tx_clear_range(ring, ring->dirty_idx, RTASE_NUM_DESC);
+		ring->cur_idx = 0;
+		ring->dirty_idx = 0;
+	}
+}
+
+static void rtase_mark_to_asic(union rtase_rx_desc *desc, u32 rx_buf_sz)
+{
+	u32 eor = le32_to_cpu(desc->desc_cmd.opts1) & RTASE_RING_END;
+
+	desc->desc_status.opts2 = 0;
+	/* force memory writes to complete before releasing descriptor */
+	dma_wmb();
+	WRITE_ONCE(desc->desc_cmd.opts1,
+		   cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));
+}
+
+static u32 rtase_tx_avail(struct rtase_ring *ring)
+{
+	return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
+	       READ_ONCE(ring->cur_idx);
+}
+
+static int tx_handler(struct rtase_ring *ring, int budget)
+{
+	const struct rtase_private *tp = ring->ivec->tp;
+	struct net_device *dev = tp->dev;
+	u32 dirty_tx, tx_left;
+	u32 bytes_compl = 0;
+	u32 pkts_compl = 0;
+	int workdone = 0;
+
+	dirty_tx = ring->dirty_idx;
+	tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
+
+	while (tx_left > 0) {
+		u32 entry = dirty_tx % RTASE_NUM_DESC;
+		struct rtase_tx_desc *desc = ring->desc +
+				       sizeof(struct rtase_tx_desc) * entry;
+		u32 status;
+
+		status = le32_to_cpu(desc->opts1);
+
+		if (status & RTASE_DESC_OWN)
+			break;
+
+		rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry], desc);
+		ring->mis.len[entry] = 0;
+		if (ring->skbuff[entry]) {
+			pkts_compl++;
+			bytes_compl += ring->skbuff[entry]->len;
+			napi_consume_skb(ring->skbuff[entry], budget);
+			ring->skbuff[entry] = NULL;
+		}
+
+		dirty_tx++;
+		tx_left--;
+		workdone++;
+
+		if (workdone == RTASE_TX_BUDGET_DEFAULT)
+			break;
+	}
+
+	if (ring->dirty_idx != dirty_tx) {
+		dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
+		WRITE_ONCE(ring->dirty_idx, dirty_tx);
+
+		netif_subqueue_completed_wake(dev, ring->index, pkts_compl,
+					      bytes_compl,
+					      rtase_tx_avail(ring),
+					      RTASE_TX_START_THRS);
+
+		if (ring->cur_idx != dirty_tx)
+			rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
+	}
+
+	return 0;
+}
+
+static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)
+{
+	struct rtase_ring *ring = &tp->tx_ring[idx];
+	struct rtase_tx_desc *desc;
+	u32 i;
+
+	memset(ring->desc, 0x0, RTASE_TX_RING_DESC_SIZE);
+	memset(ring->skbuff, 0x0, sizeof(ring->skbuff));
+	ring->cur_idx = 0;
+	ring->dirty_idx = 0;
+	ring->index = idx;
+	ring->alloc_fail = 0;
+
+	for (i = 0; i < RTASE_NUM_DESC; i++) {
+		ring->mis.len[i] = 0;
+		if ((RTASE_NUM_DESC - 1) == i) {
+			desc = ring->desc + sizeof(struct rtase_tx_desc) * i;
+			desc->opts1 = cpu_to_le32(RTASE_RING_END);
+		}
+	}
+
+	ring->ring_handler = tx_handler;
+	if (idx < 4) {
+		ring->ivec = &tp->int_vector[idx];
+		list_add_tail(&ring->ring_entry,
+			      &tp->int_vector[idx].ring_list);
+	} else {
+		ring->ivec = &tp->int_vector[0];
+		list_add_tail(&ring->ring_entry, &tp->int_vector[0].ring_list);
+	}
+}
+
+static void rtase_map_to_asic(union rtase_rx_desc *desc, dma_addr_t mapping,
+			      u32 rx_buf_sz)
+{
+	desc->desc_cmd.addr = cpu_to_le64(mapping);
+
+	rtase_mark_to_asic(desc, rx_buf_sz);
+}
+
+static void rtase_make_unusable_by_asic(union rtase_rx_desc *desc)
+{
+	desc->desc_cmd.addr = cpu_to_le64(RTK_MAGIC_NUMBER);
+	desc->desc_cmd.opts1 &= ~cpu_to_le32(RTASE_DESC_OWN | RSVD_MASK);
+}
+
+static int rtase_alloc_rx_data_buf(struct rtase_ring *ring,
+				   void **p_data_buf,
+				   union rtase_rx_desc *desc,
+				   dma_addr_t *rx_phy_addr)
+{
+	struct rtase_int_vector *ivec = ring->ivec;
+	const struct rtase_private *tp = ivec->tp;
+	dma_addr_t mapping;
+	struct page *page;
+
+	page = page_pool_dev_alloc_pages(tp->page_pool);
+	if (!page) {
+		ring->alloc_fail++;
+		goto err_out;
+	}
+
+	*p_data_buf = page_address(page);
+	mapping = page_pool_get_dma_addr(page);
+	*rx_phy_addr = mapping;
+	rtase_map_to_asic(desc, mapping, tp->rx_buf_sz);
+
+	return 0;
+
+err_out:
+	rtase_make_unusable_by_asic(desc);
+
+	return -ENOMEM;
+}
+
+static u32 rtase_rx_ring_fill(struct rtase_ring *ring, u32 ring_start,
+			      u32 ring_end)
+{
+	union rtase_rx_desc *desc_base = ring->desc;
+	u32 cur;
+
+	for (cur = ring_start; ring_end - cur > 0; cur++) {
+		u32 i = cur % RTASE_NUM_DESC;
+		union rtase_rx_desc *desc = desc_base + i;
+		int ret;
+
+		if (ring->data_buf[i])
+			continue;
+
+		ret = rtase_alloc_rx_data_buf(ring, &ring->data_buf[i], desc,
+					      &ring->mis.data_phy_addr[i]);
+		if (ret)
+			break;
+	}
+
+	return cur - ring_start;
+}
+
+static void rtase_mark_as_last_descriptor(union rtase_rx_desc *desc)
+{
+	desc->desc_cmd.opts1 |= cpu_to_le32(RTASE_RING_END);
+}
+
+static void rtase_rx_ring_clear(struct page_pool *page_pool,
+				struct rtase_ring *ring)
+{
+	union rtase_rx_desc *desc;
+	struct page *page;
+	u32 i;
+
+	for (i = 0; i < RTASE_NUM_DESC; i++) {
+		desc = ring->desc + sizeof(union rtase_rx_desc) * i;
+		page = virt_to_head_page(ring->data_buf[i]);
+
+		if (ring->data_buf[i])
+			page_pool_put_full_page(page_pool, page, true);
+
+		rtase_make_unusable_by_asic(desc);
+	}
+}
+
+static int rtase_fragmented_frame(u32 status)
+{
+	return (status & (RTASE_RX_FIRST_FRAG | RTASE_RX_LAST_FRAG)) !=
+	       (RTASE_RX_FIRST_FRAG | RTASE_RX_LAST_FRAG);
+}
+
+static void rtase_rx_csum(const struct rtase_private *tp, struct sk_buff *skb,
+			  const union rtase_rx_desc *desc)
+{
+	u32 opts2 = le32_to_cpu(desc->desc_status.opts2);
+
+	/* rx csum offload */
+	if (((opts2 & RTASE_RX_V4F) && !(opts2 & RTASE_RX_IPF)) ||
+	    (opts2 & RTASE_RX_V6F)) {
+		if (((opts2 & RTASE_RX_TCPT) && !(opts2 & RTASE_RX_TCPF)) ||
+		    ((opts2 & RTASE_RX_UDPT) && !(opts2 & RTASE_RX_UDPF)))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+}
+
+static void rtase_rx_vlan_skb(union rtase_rx_desc *desc, struct sk_buff *skb)
+{
+	u32 opts2 = le32_to_cpu(desc->desc_status.opts2);
+
+	if (!(opts2 & RTASE_RX_VLAN_TAG))
+		return;
+
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+			       swab16(opts2 & RTASE_VLAN_TAG_MASK));
+}
+
+static void rtase_rx_skb(const struct rtase_ring *ring, struct sk_buff *skb)
+{
+	struct rtase_int_vector *ivec = ring->ivec;
+
+	napi_gro_receive(&ivec->napi, skb);
+}
+
+static int rx_handler(struct rtase_ring *ring, int budget)
+{
+	union rtase_rx_desc *desc_base = ring->desc;
+	u32 pkt_size, cur_rx, delta, entry, status;
+	struct rtase_private *tp = ring->ivec->tp;
+	struct net_device *dev = tp->dev;
+	union rtase_rx_desc *desc;
+	struct sk_buff *skb;
+	int workdone = 0;
+
+	cur_rx = ring->cur_idx;
+	entry = cur_rx % RTASE_NUM_DESC;
+	desc = &desc_base[entry];
+
+	while (workdone < budget) {
+		status = le32_to_cpu(desc->desc_status.opts1);
+
+		if (status & RTASE_DESC_OWN)
+			break;
+
+		/* This barrier is needed to keep us from reading
+		 * any other fields out of the rx descriptor until
+		 * we know the status of RTASE_DESC_OWN
+		 */
+		dma_rmb();
+
+		if (unlikely(status & RTASE_RX_RES)) {
+			if (net_ratelimit())
+				netdev_warn(dev, "Rx ERROR. status = %08x\n",
+					    status);
+
+			tp->stats.rx_errors++;
+
+			if (status & (RTASE_RX_RWT | RTASE_RX_RUNT))
+				tp->stats.rx_length_errors++;
+
+			if (status & RTASE_RX_CRC)
+				tp->stats.rx_crc_errors++;
+
+			if (dev->features & NETIF_F_RXALL)
+				goto process_pkt;
+
+			rtase_mark_to_asic(desc, tp->rx_buf_sz);
+			goto skip_process_pkt;
+		}
+
+process_pkt:
+		pkt_size = status & RTASE_RX_PKT_SIZE_MASK;
+		if (likely(!(dev->features & NETIF_F_RXFCS)))
+			pkt_size -= ETH_FCS_LEN;
+
+		/* The driver does not support incoming fragmented frames.
+		 * They are seen as a symptom of over-mtu sized frames.
+		 */
+		if (unlikely(rtase_fragmented_frame(status))) {
+			tp->stats.rx_dropped++;
+			tp->stats.rx_length_errors++;
+			rtase_mark_to_asic(desc, tp->rx_buf_sz);
+			goto skip_process_pkt;
+		}
+
+		dma_sync_single_for_cpu(&tp->pdev->dev,
+					ring->mis.data_phy_addr[entry],
+					tp->rx_buf_sz, DMA_FROM_DEVICE);
+
+		skb = build_skb(ring->data_buf[entry], PAGE_SIZE);
+		if (!skb) {
+			tp->stats.rx_dropped++;
+			rtase_mark_to_asic(desc, tp->rx_buf_sz);
+			goto skip_process_pkt;
+		}
+		ring->data_buf[entry] = NULL;
+
+		if (dev->features & NETIF_F_RXCSUM)
+			rtase_rx_csum(tp, skb, desc);
+
+		skb_put(skb, pkt_size);
+		skb_mark_for_recycle(skb);
+		skb->protocol = eth_type_trans(skb, dev);
+
+		if (skb->pkt_type == PACKET_MULTICAST)
+			tp->stats.multicast++;
+
+		rtase_rx_vlan_skb(desc, skb);
+		rtase_rx_skb(ring, skb);
+
+		dev_sw_netstats_rx_add(dev, pkt_size);
+
+skip_process_pkt:
+		workdone++;
+		cur_rx++;
+		entry = cur_rx % RTASE_NUM_DESC;
+		desc = ring->desc + sizeof(union rtase_rx_desc) * entry;
+	}
+
+	ring->cur_idx = cur_rx;
+	delta = rtase_rx_ring_fill(ring, ring->dirty_idx, ring->cur_idx);
+	ring->dirty_idx += delta;
+
+	return workdone;
+}
+
+static void rtase_rx_desc_init(struct rtase_private *tp, u16 idx)
+{
+	struct rtase_ring *ring = &tp->rx_ring[idx];
+	u16 i;
+
+	memset(ring->desc, 0x0, RTASE_RX_RING_DESC_SIZE);
+	memset(ring->data_buf, 0x0, sizeof(ring->data_buf));
+	ring->cur_idx = 0;
+	ring->dirty_idx = 0;
+	ring->index = idx;
+	ring->alloc_fail = 0;
+
+	for (i = 0; i < RTASE_NUM_DESC; i++)
+		ring->mis.data_phy_addr[i] = 0;
+
+	ring->ring_handler = rx_handler;
+	ring->ivec = &tp->int_vector[idx];
+	list_add_tail(&ring->ring_entry, &tp->int_vector[idx].ring_list);
+}
+
+static void rtase_rx_clear(struct rtase_private *tp)
+{
+	u32 i;
+
+	for (i = 0; i < tp->func_rx_queue_num; i++)
+		rtase_rx_ring_clear(tp->page_pool, &tp->rx_ring[i]);
+
+	page_pool_destroy(tp->page_pool);
+	tp->page_pool = NULL;
+}
+
+static int rtase_init_ring(const struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	struct page_pool_params pp_params = { 0 };
+	struct page_pool *page_pool;
+	u32 num;
+	u16 i;
+
+	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp_params.order = 0;
+	pp_params.pool_size = RTASE_NUM_DESC * tp->func_rx_queue_num;
+	pp_params.nid = dev_to_node(&tp->pdev->dev);
+	pp_params.dev = &tp->pdev->dev;
+	pp_params.dma_dir = DMA_FROM_DEVICE;
+	pp_params.max_len = PAGE_SIZE;
+	pp_params.offset = 0;
+
+	page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(page_pool)) {
+		netdev_err(tp->dev, "failed to create page pool\n");
+		return -ENOMEM;
+	}
+
+	tp->page_pool = page_pool;
+
+	for (i = 0; i < tp->func_tx_queue_num; i++)
+		rtase_tx_desc_init(tp, i);
+
+	for (i = 0; i < tp->func_rx_queue_num; i++) {
+		rtase_rx_desc_init(tp, i);
+
+		num = rtase_rx_ring_fill(&tp->rx_ring[i], 0, RTASE_NUM_DESC);
+		if (num != RTASE_NUM_DESC)
+			goto err_out;
+
+		rtase_mark_as_last_descriptor(tp->rx_ring[i].desc +
+					      sizeof(union rtase_rx_desc) *
+					      (RTASE_NUM_DESC - 1));
+	}
+
+	return 0;
+
+err_out:
+	rtase_rx_clear(tp);
+	return -ENOMEM;
+}
+
+static void rtase_interrupt_mitigation(const struct rtase_private *tp)
+{
+	u32 i;
+
+	for (i = 0; i < tp->func_tx_queue_num; i++)
+		rtase_w16(tp, RTASE_INT_MITI_TX + i * 2, tp->tx_int_mit);
+
+	for (i = 0; i < tp->func_rx_queue_num; i++)
+		rtase_w16(tp, RTASE_INT_MITI_RX + i * 2, tp->rx_int_mit);
+}
+
+static void rtase_tally_counter_addr_fill(const struct rtase_private *tp)
+{
+	rtase_w32(tp, RTASE_DTCCR4, upper_32_bits(tp->tally_paddr));
+	rtase_w32(tp, RTASE_DTCCR0, lower_32_bits(tp->tally_paddr));
+}
+
+static void rtase_tally_counter_clear(const struct rtase_private *tp)
+{
+	u32 cmd = lower_32_bits(tp->tally_paddr);
+
+	rtase_w32(tp, RTASE_DTCCR4, upper_32_bits(tp->tally_paddr));
+	rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_RESET);
+}
+
+static void rtase_desc_addr_fill(const struct rtase_private *tp)
+{
+	const struct rtase_ring *ring;
+	u16 i, cmd, val;
+	int err;
+
+	for (i = 0; i < tp->func_tx_queue_num; i++) {
+		ring = &tp->tx_ring[i];
+
+		rtase_w32(tp, RTASE_TX_DESC_ADDR0,
+			  lower_32_bits(ring->phy_addr));
+		rtase_w32(tp, RTASE_TX_DESC_ADDR4,
+			  upper_32_bits(ring->phy_addr));
+
+		cmd = i | RTASE_TX_DESC_CMD_WE | RTASE_TX_DESC_CMD_CS;
+		rtase_w16(tp, RTASE_TX_DESC_COMMAND, cmd);
+
+		err = read_poll_timeout(rtase_r16, val,
+					!(val & RTASE_TX_DESC_CMD_CS), 10,
+					1000, false, tp,
+					RTASE_TX_DESC_COMMAND);
+
+		if (err == -ETIMEDOUT)
+			netdev_err(tp->dev,
+				   "error occurred in fill tx descriptor\n");
+	}
+
+	for (i = 0; i < tp->func_rx_queue_num; i++) {
+		ring = &tp->rx_ring[i];
+
+		if (i == 0) {
+			rtase_w32(tp, RTASE_Q0_RX_DESC_ADDR0,
+				  lower_32_bits(ring->phy_addr));
+			rtase_w32(tp, RTASE_Q0_RX_DESC_ADDR4,
+				  upper_32_bits(ring->phy_addr));
+		} else {
+			rtase_w32(tp, (RTASE_Q1_RX_DESC_ADDR0 + ((i - 1) * 8)),
+				  lower_32_bits(ring->phy_addr));
+			rtase_w32(tp, (RTASE_Q1_RX_DESC_ADDR4 + ((i - 1) * 8)),
+				  upper_32_bits(ring->phy_addr));
+		}
+	}
+}
+
+static void rtase_hw_set_features(const struct net_device *dev,
+				  netdev_features_t features)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u16 rx_config, val;
+
+	rx_config = rtase_r16(tp, RTASE_RX_CONFIG_0);
+	if (features & NETIF_F_RXALL)
+		rx_config |= (RTASE_ACCEPT_ERR | RTASE_ACCEPT_RUNT);
+	else
+		rx_config &= ~(RTASE_ACCEPT_ERR | RTASE_ACCEPT_RUNT);
+
+	rtase_w16(tp, RTASE_RX_CONFIG_0, rx_config);
+
+	val = rtase_r16(tp, RTASE_CPLUS_CMD);
+	if (features & NETIF_F_RXCSUM)
+		rtase_w16(tp, RTASE_CPLUS_CMD, val | RTASE_RX_CHKSUM);
+	else
+		rtase_w16(tp, RTASE_CPLUS_CMD, val & ~RTASE_RX_CHKSUM);
+
+	rx_config = rtase_r16(tp, RTASE_RX_CONFIG_1);
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		rx_config |= (RTASE_INNER_VLAN_DETAG_EN |
+			      RTASE_OUTER_VLAN_DETAG_EN);
+	else
+		rx_config &= ~(RTASE_INNER_VLAN_DETAG_EN |
+			       RTASE_OUTER_VLAN_DETAG_EN);
+
+	rtase_w16(tp, RTASE_RX_CONFIG_1, rx_config);
+}
+
+static void rtase_hw_set_rx_packet_filter(struct net_device *dev)
+{
+	u32 mc_filter[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
+	struct rtase_private *tp = netdev_priv(dev);
+	u16 rx_mode;
+
+	rx_mode = rtase_r16(tp, RTASE_RX_CONFIG_0) & ~RTASE_ACCEPT_MASK;
+	rx_mode |= RTASE_ACCEPT_BROADCAST | RTASE_ACCEPT_MYPHYS;
+
+	if (dev->flags & IFF_PROMISC) {
+		rx_mode |= RTASE_ACCEPT_MULTICAST | RTASE_ACCEPT_ALLPHYS;
+	} else if (dev->flags & IFF_ALLMULTI) {
+		rx_mode |= RTASE_ACCEPT_MULTICAST;
+	} else {
+		struct netdev_hw_addr *hw_addr;
+
+		mc_filter[0] = 0;
+		mc_filter[1] = 0;
+
+		netdev_for_each_mc_addr(hw_addr, dev) {
+			u32 bit_nr = eth_hw_addr_crc(hw_addr);
+			u32 idx = u32_get_bits(bit_nr, BIT(31));
+			u32 bit = u32_get_bits(bit_nr,
+					       RTASE_MULTICAST_FILTER_MASK);
+
+			mc_filter[idx] |= BIT(bit);
+			rx_mode |= RTASE_ACCEPT_MULTICAST;
+		}
+	}
+
+	if (dev->features & NETIF_F_RXALL)
+		rx_mode |= RTASE_ACCEPT_ERR | RTASE_ACCEPT_RUNT;
+
+	rtase_w32(tp, RTASE_MAR0, swab32(mc_filter[1]));
+	rtase_w32(tp, RTASE_MAR1, swab32(mc_filter[0]));
+	rtase_w16(tp, RTASE_RX_CONFIG_0, rx_mode);
+}
+
+static void rtase_irq_dis_and_clear(const struct rtase_private *tp)
+{
+	const struct rtase_int_vector *ivec = &tp->int_vector[0];
+	u32 val1;
+	u16 val2;
+	u8 i;
+
+	rtase_w32(tp, ivec->imr_addr, 0);
+	val1 = rtase_r32(tp, ivec->isr_addr);
+	rtase_w32(tp, ivec->isr_addr, val1);
+
+	for (i = 1; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		rtase_w16(tp, ivec->imr_addr, 0);
+		val2 = rtase_r16(tp, ivec->isr_addr);
+		rtase_w16(tp, ivec->isr_addr, val2);
+	}
+}
+
+static void rtase_poll_timeout(const struct rtase_private *tp, u32 cond,
+			       u32 sleep_us, u64 timeout_us, u16 reg)
+{
+	int err;
+	u8 val;
+
+	err = read_poll_timeout(rtase_r8, val, val & cond, sleep_us,
+				timeout_us, false, tp, reg);
+
+	if (err == -ETIMEDOUT)
+		netdev_err(tp->dev, "poll reg 0x00%x timeout\n", reg);
+}
+
+static void rtase_nic_reset(const struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u16 rx_config;
+	u8 val;
+
+	rx_config = rtase_r16(tp, RTASE_RX_CONFIG_0);
+	rtase_w16(tp, RTASE_RX_CONFIG_0, rx_config & ~RTASE_ACCEPT_MASK);
+
+	val = rtase_r8(tp, RTASE_MISC);
+	rtase_w8(tp, RTASE_MISC, val | RTASE_RX_DV_GATE_EN);
+
+	val = rtase_r8(tp, RTASE_CHIP_CMD);
+	rtase_w8(tp, RTASE_CHIP_CMD, val | RTASE_STOP_REQ);
+	mdelay(2);
+
+	rtase_poll_timeout(tp, RTASE_STOP_REQ_DONE, 100, 150000,
+			   RTASE_CHIP_CMD);
+
+	rtase_poll_timeout(tp, RTASE_TX_FIFO_EMPTY, 100, 100000,
+			   RTASE_FIFOR);
+
+	rtase_poll_timeout(tp, RTASE_RX_FIFO_EMPTY, 100, 100000,
+			   RTASE_FIFOR);
+
+	val = rtase_r8(tp, RTASE_CHIP_CMD);
+	rtase_w8(tp, RTASE_CHIP_CMD, val & ~(RTASE_TE | RTASE_RE));
+	val = rtase_r8(tp, RTASE_CHIP_CMD);
+	rtase_w8(tp, RTASE_CHIP_CMD, val & ~RTASE_STOP_REQ);
+
+	rtase_w16(tp, RTASE_RX_CONFIG_0, rx_config);
+}
+
+static void rtase_hw_reset(const struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+
+	rtase_irq_dis_and_clear(tp);
+
+	rtase_nic_reset(dev);
+}
+
+static void rtase_set_rx_queue(const struct rtase_private *tp)
+{
+	u16 reg_data;
+
+	reg_data = rtase_r16(tp, RTASE_FCR);
+	switch (tp->func_rx_queue_num) {
+	case 1:
+		u16p_replace_bits(&reg_data, 0x1, RTASE_FCR_RXQ_MASK);
+		break;
+	case 2:
+		u16p_replace_bits(&reg_data, 0x2, RTASE_FCR_RXQ_MASK);
+		break;
+	case 4:
+		u16p_replace_bits(&reg_data, 0x3, RTASE_FCR_RXQ_MASK);
+		break;
+	}
+	rtase_w16(tp, RTASE_FCR, reg_data);
+}
+
+static void rtase_set_tx_queue(const struct rtase_private *tp)
+{
+	u16 reg_data;
+
+	reg_data = rtase_r16(tp, RTASE_TX_CONFIG_1);
+	switch (tp->tx_queue_ctrl) {
+	case 1:
+		u16p_replace_bits(&reg_data, 0x0, RTASE_TC_MODE_MASK);
+		break;
+	case 2:
+		u16p_replace_bits(&reg_data, 0x1, RTASE_TC_MODE_MASK);
+		break;
+	case 3:
+	case 4:
+		u16p_replace_bits(&reg_data, 0x2, RTASE_TC_MODE_MASK);
+		break;
+	default:
+		u16p_replace_bits(&reg_data, 0x3, RTASE_TC_MODE_MASK);
+		break;
+	}
+	rtase_w16(tp, RTASE_TX_CONFIG_1, reg_data);
+}
+
+static void rtase_hw_config(struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u32 reg_data32;
+	u16 reg_data16;
+
+	rtase_hw_reset(dev);
+
+	/* set rx dma burst */
+	reg_data16 = rtase_r16(tp, RTASE_RX_CONFIG_0);
+	reg_data16 &= ~(RTASE_RX_SINGLE_TAG | RTASE_RX_SINGLE_FETCH);
+	u16p_replace_bits(&reg_data16, RTASE_RX_DMA_BURST_256,
+			  RTASE_RX_MX_DMA_MASK);
+	rtase_w16(tp, RTASE_RX_CONFIG_0, reg_data16);
+
+	/* new rx descritpor */
+	reg_data16 = rtase_r16(tp, RTASE_RX_CONFIG_1);
+	reg_data16 |= RTASE_RX_NEW_DESC_FORMAT_EN | RTASE_PCIE_NEW_FLOW;
+	u16p_replace_bits(&reg_data16, 0xF, RTASE_RX_MAX_FETCH_DESC_MASK);
+	rtase_w16(tp, RTASE_RX_CONFIG_1, reg_data16);
+
+	rtase_set_rx_queue(tp);
+
+	rtase_interrupt_mitigation(tp);
+
+	/* set tx dma burst size and interframe gap time */
+	reg_data32 = rtase_r32(tp, RTASE_TX_CONFIG_0);
+	u32p_replace_bits(&reg_data32, RTASE_TX_DMA_BURST_UNLIMITED,
+			  RTASE_TX_DMA_MASK);
+	u32p_replace_bits(&reg_data32, RTASE_INTERFRAMEGAP,
+			  RTASE_TX_INTER_FRAME_GAP_MASK);
+	rtase_w32(tp, RTASE_TX_CONFIG_0, reg_data32);
+
+	/* new tx descriptor */
+	reg_data16 = rtase_r16(tp, RTASE_TFUN_CTRL);
+	rtase_w16(tp, RTASE_TFUN_CTRL, reg_data16 |
+		  RTASE_TX_NEW_DESC_FORMAT_EN);
+
+	/* tx fetch desc number */
+	rtase_w8(tp, RTASE_TDFNR, 0x10);
+
+	/* tag num select */
+	reg_data16 = rtase_r16(tp, RTASE_MTPS);
+	u16p_replace_bits(&reg_data16, 0x4, RTASE_TAG_NUM_SEL_MASK);
+	rtase_w16(tp, RTASE_MTPS, reg_data16);
+
+	rtase_set_tx_queue(tp);
+
+	rtase_w16(tp, RTASE_TOKSEL, 0x5555);
+
+	rtase_tally_counter_addr_fill(tp);
+	rtase_desc_addr_fill(tp);
+	rtase_hw_set_features(dev, dev->features);
+
+	/* enable flow control */
+	reg_data16 = rtase_r16(tp, RTASE_CPLUS_CMD);
+	reg_data16 |= (RTASE_FORCE_TXFLOW_EN | RTASE_FORCE_RXFLOW_EN);
+	rtase_w16(tp, RTASE_CPLUS_CMD, reg_data16);
+	/* set near fifo threshold - rx missed issue. */
+	rtase_w16(tp, RTASE_RFIFONFULL, 0x190);
+
+	rtase_w16(tp, RTASE_RMS, tp->rx_buf_sz);
+
+	rtase_hw_set_rx_packet_filter(dev);
+}
+
+static void rtase_nic_enable(const struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u16 rcr = rtase_r16(tp, RTASE_RX_CONFIG_1);
+	u8 val;
+
+	rtase_w16(tp, RTASE_RX_CONFIG_1, rcr & ~RTASE_PCIE_RELOAD_EN);
+	rtase_w16(tp, RTASE_RX_CONFIG_1, rcr | RTASE_PCIE_RELOAD_EN);
+
+	val = rtase_r8(tp, RTASE_CHIP_CMD);
+	rtase_w8(tp, RTASE_CHIP_CMD, val | RTASE_TE | RTASE_RE);
+
+	val = rtase_r8(tp, RTASE_MISC);
+	rtase_w8(tp, RTASE_MISC, val & ~RTASE_RX_DV_GATE_EN);
+}
+
+static void rtase_enable_hw_interrupt(const struct rtase_private *tp)
+{
+	const struct rtase_int_vector *ivec = &tp->int_vector[0];
+	u32 i;
+
+	rtase_w32(tp, ivec->imr_addr, ivec->imr);
+
+	for (i = 1; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		rtase_w16(tp, ivec->imr_addr, ivec->imr);
+	}
+}
+
+static void rtase_hw_start(const struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+
+	rtase_nic_enable(dev);
+	rtase_enable_hw_interrupt(tp);
+}
+
+/*  the interrupt handler does RXQ0 and TXQ0, TXQ4~7 interrutp status
+ */
+static irqreturn_t rtase_interrupt(int irq, void *dev_instance)
+{
+	const struct rtase_private *tp;
+	struct rtase_int_vector *ivec;
+	u32 status;
+
+	ivec = dev_instance;
+	tp = ivec->tp;
+	status = rtase_r32(tp, ivec->isr_addr);
+
+	rtase_w32(tp, ivec->imr_addr, 0x0);
+	rtase_w32(tp, ivec->isr_addr, status & ~RTASE_FOVW);
+
+	if (napi_schedule_prep(&ivec->napi))
+		__napi_schedule(&ivec->napi);
+
+	return IRQ_HANDLED;
+}
+
+/*  the interrupt handler does RXQ1&TXQ1 or RXQ2&TXQ2 or RXQ3&TXQ3 interrupt
+ *  status according to interrupt vector
+ */
+static irqreturn_t rtase_q_interrupt(int irq, void *dev_instance)
+{
+	const struct rtase_private *tp;
+	struct rtase_int_vector *ivec;
+	u16 status;
+
+	ivec = dev_instance;
+	tp = ivec->tp;
+	status = rtase_r16(tp, ivec->isr_addr);
+
+	rtase_w16(tp, ivec->imr_addr, 0x0);
+	rtase_w16(tp, ivec->isr_addr, status);
+
+	if (napi_schedule_prep(&ivec->napi))
+		__napi_schedule(&ivec->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int rtase_poll(struct napi_struct *napi, int budget)
+{
+	const struct rtase_int_vector *ivec;
+	const struct rtase_private *tp;
+	struct rtase_ring *ring;
+	int total_workdone = 0;
+
+	ivec = container_of(napi, struct rtase_int_vector, napi);
+	tp = ivec->tp;
+
+	list_for_each_entry(ring, &ivec->ring_list, ring_entry)
+		total_workdone += ring->ring_handler(ring, budget);
+
+	if (total_workdone >= budget)
+		return budget;
+
+	if (napi_complete_done(napi, total_workdone)) {
+		if (!ivec->index)
+			rtase_w32(tp, ivec->imr_addr, ivec->imr);
+		else
+			rtase_w16(tp, ivec->imr_addr, ivec->imr);
+	}
+
+	return total_workdone;
+}
+
+static int rtase_open(struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	const struct pci_dev *pdev = tp->pdev;
+	struct rtase_int_vector *ivec;
+	u16 i = 0, j;
+	int ret;
+
+	ivec = &tp->int_vector[0];
+	tp->rx_buf_sz = RTASE_RX_BUF_SIZE;
+
+	ret = rtase_alloc_desc(tp);
+	if (ret)
+		return ret;
+
+	ret = rtase_init_ring(dev);
+	if (ret)
+		goto err_free_all_allocated_mem;
+
+	rtase_hw_config(dev);
+
+	if (tp->sw_flag & RTASE_SWF_MSIX_ENABLED) {
+		ret = request_irq(ivec->irq, rtase_interrupt, 0,
+				  dev->name, ivec);
+		if (ret)
+			goto err_free_all_allocated_irq;
+
+		/* request other interrupts to handle multiqueue */
+		for (i = 1; i < tp->int_nums; i++) {
+			ivec = &tp->int_vector[i];
+			snprintf(ivec->name, sizeof(ivec->name), "%s_int%i",
+				 tp->dev->name, i);
+			ret = request_irq(ivec->irq, rtase_q_interrupt, 0,
+					  ivec->name, ivec);
+			if (ret)
+				goto err_free_all_allocated_irq;
+		}
+	} else {
+		ret = request_irq(pdev->irq, rtase_interrupt, 0, dev->name,
+				  ivec);
+		if (ret)
+			goto err_free_all_allocated_mem;
+	}
+
+	rtase_hw_start(dev);
+
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		napi_enable(&ivec->napi);
+	}
+
+	netif_carrier_on(dev);
+	netif_wake_queue(dev);
+
+	return 0;
+
+err_free_all_allocated_irq:
+	for (j = 0; j < i; j++)
+		free_irq(tp->int_vector[j].irq, &tp->int_vector[j]);
+
+err_free_all_allocated_mem:
+	rtase_free_desc(tp);
+
+	return ret;
+}
+
+static void rtase_down(struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	struct rtase_int_vector *ivec;
+	struct rtase_ring *ring, *tmp;
+	u32 i;
+
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		napi_disable(&ivec->napi);
+		list_for_each_entry_safe(ring, tmp, &ivec->ring_list,
+					 ring_entry)
+			list_del(&ring->ring_entry);
+	}
+
+	netif_tx_disable(dev);
+
+	netif_carrier_off(dev);
+
+	rtase_hw_reset(dev);
+
+	rtase_tx_clear(tp);
+
+	rtase_rx_clear(tp);
+}
+
+static int rtase_close(struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	const struct pci_dev *pdev = tp->pdev;
+	u32 i;
+
+	rtase_down(dev);
+
+	if (tp->sw_flag & RTASE_SWF_MSIX_ENABLED) {
+		for (i = 0; i < tp->int_nums; i++)
+			free_irq(tp->int_vector[i].irq, &tp->int_vector[i]);
+
+	} else {
+		free_irq(pdev->irq, &tp->int_vector[0]);
+	}
+
+	rtase_free_desc(tp);
+
+	return 0;
+}
+
+static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
+			     const struct sk_buff *skb)
+{
+	return (skb_vlan_tag_present(skb)) ?
+		(RTASE_TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb))) : 0x00;
+}
+
+static u32 rtase_tx_csum(struct sk_buff *skb, const struct net_device *dev)
+{
+	u32 csum_cmd = 0;
+	u8 ip_protocol;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		csum_cmd = RTASE_TX_IPCS_C;
+		ip_protocol = ip_hdr(skb)->protocol;
+		break;
+
+	case htons(ETH_P_IPV6):
+		csum_cmd = RTASE_TX_IPV6F_C;
+		ip_protocol = ipv6_hdr(skb)->nexthdr;
+		break;
+
+	default:
+		ip_protocol = IPPROTO_RAW;
+		break;
+	}
+
+	if (ip_protocol == IPPROTO_TCP)
+		csum_cmd |= RTASE_TX_TCPCS_C;
+	else if (ip_protocol == IPPROTO_UDP)
+		csum_cmd |= RTASE_TX_UDPCS_C;
+
+	csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
+				    RTASE_TCPHO_MASK);
+
+	return csum_cmd;
+}
+
+static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
+			    u32 opts1, u32 opts2)
+{
+	const struct skb_shared_info *info = skb_shinfo(skb);
+	const struct rtase_private *tp = ring->ivec->tp;
+	const u8 nr_frags = info->nr_frags;
+	struct rtase_tx_desc *txd = NULL;
+	u32 cur_frag, entry;
+
+	entry = ring->cur_idx;
+	for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
+		const skb_frag_t *frag = &info->frags[cur_frag];
+		dma_addr_t mapping;
+		u32 status, len;
+		void *addr;
+
+		entry = (entry + 1) % RTASE_NUM_DESC;
+
+		txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
+		len = skb_frag_size(frag);
+		addr = skb_frag_address(frag);
+		mapping = dma_map_single(&tp->pdev->dev, addr, len,
+					 DMA_TO_DEVICE);
+
+		if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
+			if (unlikely(net_ratelimit()))
+				netdev_err(tp->dev,
+					   "Failed to map TX fragments DMA!\n");
+
+			goto err_out;
+		}
+
+		if (((entry + 1) % RTASE_NUM_DESC) == 0)
+			status = (opts1 | len | RTASE_RING_END);
+		else
+			status = opts1 | len;
+
+		if (cur_frag == (nr_frags - 1)) {
+			ring->skbuff[entry] = skb;
+			status |= RTASE_TX_LAST_FRAG;
+		}
+
+		ring->mis.len[entry] = len;
+		txd->addr = cpu_to_le64(mapping);
+		txd->opts2 = cpu_to_le32(opts2);
+
+		/* make sure the operating fields have been updated */
+		dma_wmb();
+		txd->opts1 = cpu_to_le32(status);
+	}
+
+	return cur_frag;
+
+err_out:
+	rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
+	return -EIO;
+}
+
+static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	struct rtase_private *tp = netdev_priv(dev);
+	u32 q_idx, entry, len, opts1, opts2;
+	struct netdev_queue *tx_queue;
+	bool stop_queue, door_bell;
+	u32 mss = shinfo->gso_size;
+	struct rtase_tx_desc *txd;
+	struct rtase_ring *ring;
+	dma_addr_t mapping;
+	int frags;
+
+	/* multiqueues */
+	q_idx = skb_get_queue_mapping(skb);
+	ring = &tp->tx_ring[q_idx];
+	tx_queue = netdev_get_tx_queue(dev, q_idx);
+
+	if (unlikely(!rtase_tx_avail(ring))) {
+		if (net_ratelimit())
+			netdev_err(dev,
+				   "BUG! Tx Ring full when queue awake!\n");
+
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	entry = ring->cur_idx % RTASE_NUM_DESC;
+	txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
+
+	opts1 = RTASE_DESC_OWN;
+	opts2 = rtase_tx_vlan_tag(tp, skb);
+
+	/* tcp segmentation offload (or tcp large send) */
+	if (mss) {
+		if (shinfo->gso_type & SKB_GSO_TCPV4) {
+			opts1 |= RTASE_GIANT_SEND_V4;
+		} else if (shinfo->gso_type & SKB_GSO_TCPV6) {
+			if (skb_cow_head(skb, 0))
+				goto err_dma_0;
+
+			tcp_v6_gso_csum_prep(skb);
+			opts1 |= RTASE_GIANT_SEND_V6;
+		} else {
+			WARN_ON_ONCE(1);
+		}
+
+		opts1 |= u32_encode_bits(skb_transport_offset(skb),
+					 RTASE_TCPHO_MASK);
+		opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		opts2 |= rtase_tx_csum(skb, dev);
+	}
+
+	frags = rtase_xmit_frags(ring, skb, opts1, opts2);
+	if (unlikely(frags < 0))
+		goto err_dma_0;
+
+	if (frags) {
+		len = skb_headlen(skb);
+		opts1 |= RTASE_TX_FIRST_FRAG;
+	} else {
+		len = skb->len;
+		ring->skbuff[entry] = skb;
+		opts1 |= RTASE_TX_FIRST_FRAG | RTASE_TX_LAST_FRAG;
+	}
+
+	if (((entry + 1) % RTASE_NUM_DESC) == 0)
+		opts1 |= (len | RTASE_RING_END);
+	else
+		opts1 |= len;
+
+	mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
+				 DMA_TO_DEVICE);
+
+	if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
+		if (unlikely(net_ratelimit()))
+			netdev_err(dev, "Failed to map TX DMA!\n");
+
+		goto err_dma_1;
+	}
+
+	ring->mis.len[entry] = len;
+	txd->addr = cpu_to_le64(mapping);
+	txd->opts2 = cpu_to_le32(opts2);
+	txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
+
+	/* make sure the operating fields have been updated */
+	dma_wmb();
+
+	door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
+					   netdev_xmit_more());
+
+	txd->opts1 = cpu_to_le32(opts1);
+
+	skb_tx_timestamp(skb);
+
+	/* tx needs to see descriptor changes before updated cur_idx */
+	smp_wmb();
+
+	WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
+
+	stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
+						rtase_tx_avail(ring),
+						RTASE_TX_STOP_THRS,
+						RTASE_TX_START_THRS);
+
+	if (door_bell || stop_queue)
+		rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
+
+	return NETDEV_TX_OK;
+
+err_dma_1:
+	ring->skbuff[entry] = NULL;
+	rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
+
+err_dma_0:
+	tp->stats.tx_dropped++;
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static void rtase_set_rx_mode(struct net_device *dev)
+{
+	rtase_hw_set_rx_packet_filter(dev);
+}
+
+static void rtase_enable_eem_write(const struct rtase_private *tp)
+{
+	u8 val;
+
+	val = rtase_r8(tp, RTASE_EEM);
+	rtase_w8(tp, RTASE_EEM, val | RTASE_EEM_UNLOCK);
+}
+
+static void rtase_disable_eem_write(const struct rtase_private *tp)
+{
+	u8 val;
+
+	val = rtase_r8(tp, RTASE_EEM);
+	rtase_w8(tp, RTASE_EEM, val & ~RTASE_EEM_UNLOCK);
+}
+
+static void rtase_rar_set(const struct rtase_private *tp, const u8 *addr)
+{
+	u32 rar_low, rar_high;
+
+	rar_low = (u32)addr[0] | ((u32)addr[1] << 8) |
+		  ((u32)addr[2] << 16) | ((u32)addr[3] << 24);
+
+	rar_high = (u32)addr[4] | ((u32)addr[5] << 8);
+
+	rtase_enable_eem_write(tp);
+	rtase_w32(tp, RTASE_MAC0, rar_low);
+	rtase_w32(tp, RTASE_MAC4, rar_high);
+	rtase_disable_eem_write(tp);
+	rtase_w16(tp, RTASE_LBK_CTRL, RTASE_LBK_ATLD | RTASE_LBK_CLR);
+}
+
+static int rtase_set_mac_address(struct net_device *dev, void *p)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	int ret;
+
+	ret = eth_mac_addr(dev, p);
+	if (ret)
+		return ret;
+
+	rtase_rar_set(tp, dev->dev_addr);
+
+	return 0;
+}
+
+static int rtase_change_mtu(struct net_device *dev, int new_mtu)
+{
+	dev->mtu = new_mtu;
+
+	netdev_update_features(dev);
+
+	return 0;
+}
+
+static void rtase_wait_for_quiescence(const struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	struct rtase_int_vector *ivec;
+	u32 i;
+
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		synchronize_irq(ivec->irq);
+		/* wait for any pending NAPI task to complete */
+		napi_disable(&ivec->napi);
+	}
+
+	rtase_irq_dis_and_clear(tp);
+
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		napi_enable(&ivec->napi);
+	}
+}
+
+static void rtase_sw_reset(struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	int ret;
+
+	netif_stop_queue(dev);
+	netif_carrier_off(dev);
+	rtase_hw_reset(dev);
+
+	/* let's wait a bit while any (async) irq lands on */
+	rtase_wait_for_quiescence(dev);
+	rtase_tx_clear(tp);
+	rtase_rx_clear(tp);
+
+	ret = rtase_init_ring(dev);
+	if (ret) {
+		netdev_err(dev, "unable to init ring\n");
+		rtase_free_desc(tp);
+		return;
+	}
+
+	rtase_hw_config(dev);
+	/* always link, so start to transmit & receive */
+	rtase_hw_start(dev);
+
+	netif_carrier_on(dev);
+	netif_wake_queue(dev);
+}
+
+static void rtase_dump_tally_counter(const struct rtase_private *tp)
+{
+	dma_addr_t paddr = tp->tally_paddr;
+	u32 cmd = lower_32_bits(paddr);
+	u32 val;
+	int err;
+
+	rtase_w32(tp, RTASE_DTCCR4, upper_32_bits(paddr));
+	rtase_w32(tp, RTASE_DTCCR0, cmd);
+	rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_DUMP);
+
+	err = read_poll_timeout(rtase_r32, val, !(val & RTASE_COUNTER_DUMP),
+				10, 250, false, tp, RTASE_DTCCR0);
+
+	if (err == -ETIMEDOUT)
+		netdev_err(tp->dev, "error occurred in dump tally counter\n");
+}
+
+static void rtase_dump_state(const struct net_device *dev)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	int max_reg_size = RTASE_PCI_REGS_SIZE;
+	const struct rtase_counters *counters;
+	const struct rtase_ring *ring;
+	u32 dword_rd;
+	int n = 0;
+
+	ring = &tp->tx_ring[0];
+	netdev_err(dev, "Tx descriptor info:\n");
+	netdev_err(dev, "Tx curIdx = 0x%x\n", ring->cur_idx);
+	netdev_err(dev, "Tx dirtyIdx = 0x%x\n", ring->dirty_idx);
+	netdev_err(dev, "Tx phyAddr = %pad\n", &ring->phy_addr);
+
+	ring = &tp->rx_ring[0];
+	netdev_err(dev, "Rx descriptor info:\n");
+	netdev_err(dev, "Rx curIdx = 0x%x\n", ring->cur_idx);
+	netdev_err(dev, "Rx dirtyIdx = 0x%x\n", ring->dirty_idx);
+	netdev_err(dev, "Rx phyAddr = %pad\n", &ring->phy_addr);
+
+	netdev_err(dev, "Device Registers:\n");
+	netdev_err(dev, "Chip Command = 0x%02x\n",
+		   rtase_r8(tp, RTASE_CHIP_CMD));
+	netdev_err(dev, "IMR = %08x\n", rtase_r32(tp, RTASE_IMR0));
+	netdev_err(dev, "ISR = %08x\n", rtase_r32(tp, RTASE_ISR0));
+	netdev_err(dev, "Boot Ctrl Reg(0xE004) = %04x\n",
+		   rtase_r16(tp, RTASE_BOOT_CTL));
+	netdev_err(dev, "EPHY ISR(0xE014) = %04x\n",
+		   rtase_r16(tp, RTASE_EPHY_ISR));
+	netdev_err(dev, "EPHY IMR(0xE016) = %04x\n",
+		   rtase_r16(tp, RTASE_EPHY_IMR));
+	netdev_err(dev, "CLKSW SET REG(0xE018) = %04x\n",
+		   rtase_r16(tp, RTASE_CLKSW_SET));
+
+	netdev_err(dev, "Dump PCI Registers:\n");
+
+	while (n < max_reg_size) {
+		if ((n % RTASE_DWORD_MOD) == 0)
+			netdev_err(tp->dev, "0x%03x:\n", n);
+
+		pci_read_config_dword(tp->pdev, n, &dword_rd);
+		netdev_err(tp->dev, "%08x\n", dword_rd);
+		n += 4;
+	}
+
+	netdev_err(dev, "Dump tally counter:\n");
+	counters = tp->tally_vaddr;
+	rtase_dump_tally_counter(tp);
+
+	netdev_err(dev, "tx_packets %lld\n",
+		   le64_to_cpu(counters->tx_packets));
+	netdev_err(dev, "rx_packets %lld\n",
+		   le64_to_cpu(counters->rx_packets));
+	netdev_err(dev, "tx_errors %lld\n",
+		   le64_to_cpu(counters->tx_errors));
+	netdev_err(dev, "rx_errors %d\n",
+		   le32_to_cpu(counters->rx_errors));
+	netdev_err(dev, "rx_missed %d\n",
+		   le16_to_cpu(counters->rx_missed));
+	netdev_err(dev, "align_errors %d\n",
+		   le16_to_cpu(counters->align_errors));
+	netdev_err(dev, "tx_one_collision %d\n",
+		   le32_to_cpu(counters->tx_one_collision));
+	netdev_err(dev, "tx_multi_collision %d\n",
+		   le32_to_cpu(counters->tx_multi_collision));
+	netdev_err(dev, "rx_unicast %lld\n",
+		   le64_to_cpu(counters->rx_unicast));
+	netdev_err(dev, "rx_broadcast %lld\n",
+		   le64_to_cpu(counters->rx_broadcast));
+	netdev_err(dev, "rx_multicast %d\n",
+		   le32_to_cpu(counters->rx_multicast));
+	netdev_err(dev, "tx_aborted %d\n",
+		   le16_to_cpu(counters->tx_aborted));
+	netdev_err(dev, "tx_underrun %d\n",
+		   le16_to_cpu(counters->tx_underrun));
+}
+
+static void rtase_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	rtase_dump_state(dev);
+	rtase_sw_reset(dev);
+}
+
+static void rtase_get_stats64(struct net_device *dev,
+			      struct rtnl_link_stats64 *stats)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	const struct rtase_counters *counters;
+
+	counters = tp->tally_vaddr;
+
+	dev_fetch_sw_netstats(stats, dev->tstats);
+
+	/* fetch additional counter values missing in stats collected by driver
+	 * from tally counter
+	 */
+	rtase_dump_tally_counter(tp);
+	stats->rx_errors = tp->stats.rx_errors;
+	stats->tx_errors = le64_to_cpu(counters->tx_errors);
+	stats->rx_dropped = tp->stats.rx_dropped;
+	stats->tx_dropped = tp->stats.tx_dropped;
+	stats->multicast = tp->stats.multicast;
+	stats->rx_length_errors = tp->stats.rx_length_errors;
+}
+
+static netdev_features_t rtase_fix_features(struct net_device *dev,
+					    netdev_features_t features)
+{
+	netdev_features_t features_fix = features;
+
+	/* not support TSO for jumbo frames */
+	if (dev->mtu > ETH_DATA_LEN)
+		features_fix &= ~NETIF_F_ALL_TSO;
+
+	return features_fix;
+}
+
+static int rtase_set_features(struct net_device *dev,
+			      netdev_features_t features)
+{
+	netdev_features_t features_set = features;
+
+	features_set &= NETIF_F_RXALL | NETIF_F_RXCSUM |
+			NETIF_F_HW_VLAN_CTAG_RX;
+
+	if (features_set ^ dev->features)
+		rtase_hw_set_features(dev, features_set);
+
+	return 0;
+}
+
+static const struct net_device_ops rtase_netdev_ops = {
+	.ndo_open = rtase_open,
+	.ndo_stop = rtase_close,
+	.ndo_start_xmit = rtase_start_xmit,
+	.ndo_set_rx_mode = rtase_set_rx_mode,
+	.ndo_set_mac_address = rtase_set_mac_address,
+	.ndo_change_mtu = rtase_change_mtu,
+	.ndo_tx_timeout = rtase_tx_timeout,
+	.ndo_get_stats64 = rtase_get_stats64,
+	.ndo_fix_features = rtase_fix_features,
+	.ndo_set_features = rtase_set_features,
+};
+
+static void rtase_get_mac_address(struct net_device *dev)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	u8 mac_addr[ETH_ALEN] __aligned(2) = {};
+	u32 i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = rtase_r8(tp, RTASE_MAC0 + i);
+
+	if (!is_valid_ether_addr(mac_addr)) {
+		eth_hw_addr_random(dev);
+		netdev_warn(dev, "Random ether addr %pM\n", dev->dev_addr);
+	} else {
+		eth_hw_addr_set(dev, mac_addr);
+		ether_addr_copy(dev->perm_addr, dev->dev_addr);
+	}
+
+	rtase_rar_set(tp, dev->dev_addr);
+}
+
+static int rtase_get_settings(struct net_device *dev,
+			      struct ethtool_link_ksettings *cmd)
+{
+	u32 supported = SUPPORTED_MII | SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	cmd->base.speed = SPEED_5000;
+	cmd->base.duplex = DUPLEX_FULL;
+	cmd->base.port = PORT_MII;
+	cmd->base.autoneg = AUTONEG_DISABLE;
+
+	return 0;
+}
+
+static void rtase_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u16 value = rtase_r16(tp, RTASE_CPLUS_CMD);
+
+	pause->autoneg = AUTONEG_DISABLE;
+	pause->tx_pause = !!(value & RTASE_FORCE_TXFLOW_EN);
+	pause->rx_pause = !!(value & RTASE_FORCE_RXFLOW_EN);
+}
+
+static int rtase_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+	u16 value = rtase_r16(tp, RTASE_CPLUS_CMD);
+
+	if (pause->autoneg)
+		return -EOPNOTSUPP;
+
+	value &= ~(RTASE_FORCE_TXFLOW_EN | RTASE_FORCE_RXFLOW_EN);
+
+	if (pause->tx_pause)
+		value |= RTASE_FORCE_TXFLOW_EN;
+
+	if (pause->rx_pause)
+		value |= RTASE_FORCE_RXFLOW_EN;
+
+	rtase_w16(tp, RTASE_CPLUS_CMD, value);
+	return 0;
+}
+
+static void rtase_get_eth_mac_stats(struct net_device *dev,
+				    struct ethtool_eth_mac_stats *stats)
+{
+	struct rtase_private *tp = netdev_priv(dev);
+	const struct rtase_counters *counters;
+
+	counters = tp->tally_vaddr;
+
+	rtase_dump_tally_counter(tp);
+
+	stats->FramesTransmittedOK = le64_to_cpu(counters->tx_packets);
+	stats->FramesReceivedOK = le64_to_cpu(counters->rx_packets);
+	stats->FramesLostDueToIntMACXmitError =
+		le64_to_cpu(counters->tx_errors);
+	stats->BroadcastFramesReceivedOK = le64_to_cpu(counters->rx_broadcast);
+}
+
+static const struct ethtool_ops rtase_ethtool_ops = {
+	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = rtase_get_settings,
+	.get_pauseparam = rtase_get_pauseparam,
+	.set_pauseparam = rtase_set_pauseparam,
+	.get_eth_mac_stats = rtase_get_eth_mac_stats,
+	.get_ts_info = ethtool_op_get_ts_info,
+};
+
+static void rtase_init_netdev_ops(struct net_device *dev)
+{
+	dev->netdev_ops = &rtase_netdev_ops;
+	dev->ethtool_ops = &rtase_ethtool_ops;
+}
+
+static void rtase_reset_interrupt(struct pci_dev *pdev,
+				  const struct rtase_private *tp)
+{
+	if (tp->sw_flag & RTASE_SWF_MSIX_ENABLED)
+		pci_disable_msix(pdev);
+	else
+		pci_disable_msi(pdev);
+}
+
+static int rtase_alloc_msix(struct pci_dev *pdev, struct rtase_private *tp)
+{
+	int ret, irq;
+	u16 i;
+
+	memset(tp->msix_entry, 0x0, RTASE_NUM_MSIX *
+	       sizeof(struct msix_entry));
+
+	for (i = 0; i < RTASE_NUM_MSIX; i++)
+		tp->msix_entry[i].entry = i;
+
+	ret = pci_enable_msix_exact(pdev, tp->msix_entry, tp->int_nums);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < tp->int_nums; i++) {
+		irq = pci_irq_vector(pdev, i);
+		if (!irq) {
+			pci_disable_msix(pdev);
+			return irq;
+		}
+
+		tp->int_vector[i].irq = irq;
+	}
+
+	return 0;
+}
+
+static int rtase_alloc_interrupt(struct pci_dev *pdev,
+				 struct rtase_private *tp)
+{
+	int ret;
+
+	ret = rtase_alloc_msix(pdev, tp);
+	if (ret) {
+		ret = pci_enable_msi(pdev);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"unable to alloc interrupt.(MSI)\n");
+			return ret;
+		}
+
+		tp->sw_flag |= RTASE_SWF_MSI_ENABLED;
+	} else {
+		tp->sw_flag |= RTASE_SWF_MSIX_ENABLED;
+	}
+
+	return 0;
+}
+
+static void rtase_init_hardware(const struct rtase_private *tp)
+{
+	u16 i;
+
+	for (i = 0; i < RTASE_VLAN_FILTER_ENTRY_NUM; i++)
+		rtase_w32(tp, RTASE_VLAN_ENTRY_0 + i * 4, 0);
+}
+
+static void rtase_init_int_vector(struct rtase_private *tp)
+{
+	u16 i;
+
+	/* interrupt vector 0 */
+	tp->int_vector[0].tp = tp;
+	tp->int_vector[0].index = 0;
+	tp->int_vector[0].imr_addr = RTASE_IMR0;
+	tp->int_vector[0].isr_addr = RTASE_ISR0;
+	tp->int_vector[0].imr = RTASE_ROK | RTASE_RDU | RTASE_TOK |
+				RTASE_TOK4 | RTASE_TOK5 | RTASE_TOK6 |
+				RTASE_TOK7;
+	tp->int_vector[0].poll = rtase_poll;
+
+	memset(tp->int_vector[0].name, 0x0, sizeof(tp->int_vector[0].name));
+	INIT_LIST_HEAD(&tp->int_vector[0].ring_list);
+
+	netif_napi_add(tp->dev, &tp->int_vector[0].napi,
+		       tp->int_vector[0].poll);
+
+	/* interrupt vector 1 ~ 3 */
+	for (i = 1; i < tp->int_nums; i++) {
+		tp->int_vector[i].tp = tp;
+		tp->int_vector[i].index = i;
+		tp->int_vector[i].imr_addr = RTASE_IMR1 + (i - 1) * 4;
+		tp->int_vector[i].isr_addr = RTASE_ISR1 + (i - 1) * 4;
+		tp->int_vector[i].imr = RTASE_Q_ROK | RTASE_Q_RDU |
+					RTASE_Q_TOK;
+		tp->int_vector[i].poll = rtase_poll;
+
+		memset(tp->int_vector[i].name, 0x0,
+		       sizeof(tp->int_vector[0].name));
+		INIT_LIST_HEAD(&tp->int_vector[i].ring_list);
+
+		netif_napi_add(tp->dev, &tp->int_vector[i].napi,
+			       tp->int_vector[i].poll);
+	}
+}
+
+static u16 rtase_calc_time_mitigation(u32 time_us)
+{
+	u8 msb, time_count, time_unit;
+	u16 int_miti;
+
+	time_us = min_t(int, time_us, RTASE_MITI_MAX_TIME);
+
+	msb = fls(time_us);
+	if (msb >= RTASE_MITI_COUNT_BIT_NUM) {
+		time_unit = msb - RTASE_MITI_COUNT_BIT_NUM;
+		time_count = time_us >> (msb - RTASE_MITI_COUNT_BIT_NUM);
+	} else {
+		time_unit = 0;
+		time_count = time_us;
+	}
+
+	int_miti = u16_encode_bits(time_count, RTASE_MITI_TIME_COUNT_MASK) |
+		   u16_encode_bits(time_unit, RTASE_MITI_TIME_UNIT_MASK);
+
+	return int_miti;
+}
+
+static u16 rtase_calc_packet_num_mitigation(u16 pkt_num)
+{
+	u8 msb, pkt_num_count, pkt_num_unit;
+	u16 int_miti;
+
+	pkt_num = min_t(int, pkt_num, RTASE_MITI_MAX_PKT_NUM);
+
+	if (pkt_num > 60) {
+		pkt_num_unit = RTASE_MITI_MAX_PKT_NUM_IDX;
+		pkt_num_count = pkt_num / RTASE_MITI_MAX_PKT_NUM_UNIT;
+	} else {
+		msb = fls(pkt_num);
+		if (msb >= RTASE_MITI_COUNT_BIT_NUM) {
+			pkt_num_unit = msb - RTASE_MITI_COUNT_BIT_NUM;
+			pkt_num_count = pkt_num >> (msb -
+						    RTASE_MITI_COUNT_BIT_NUM);
+		} else {
+			pkt_num_unit = 0;
+			pkt_num_count = pkt_num;
+		}
+	}
+
+	int_miti = u16_encode_bits(pkt_num_count,
+				   RTASE_MITI_PKT_NUM_COUNT_MASK) |
+		   u16_encode_bits(pkt_num_unit,
+				   RTASE_MITI_PKT_NUM_UNIT_MASK);
+
+	return int_miti;
+}
+
+static void rtase_init_software_variable(struct pci_dev *pdev,
+					 struct rtase_private *tp)
+{
+	u16 int_miti;
+
+	tp->tx_queue_ctrl = RTASE_TXQ_CTRL;
+	tp->func_tx_queue_num = RTASE_FUNC_TXQ_NUM;
+	tp->func_rx_queue_num = RTASE_FUNC_RXQ_NUM;
+	tp->int_nums = RTASE_INTERRUPT_NUM;
+
+	int_miti = rtase_calc_time_mitigation(RTASE_MITI_DEFAULT_TIME) |
+		   rtase_calc_packet_num_mitigation(RTASE_MITI_DEFAULT_PKT_NUM);
+	tp->tx_int_mit = int_miti;
+	tp->rx_int_mit = int_miti;
+
+	tp->sw_flag = 0;
+
+	rtase_init_int_vector(tp);
+
+	/* MTU range: 60 - hw-specific max */
+	tp->dev->min_mtu = ETH_ZLEN;
+	tp->dev->max_mtu = RTASE_MAX_JUMBO_SIZE;
+}
+
+static bool rtase_check_mac_version_valid(struct rtase_private *tp)
+{
+	u32 hw_ver = rtase_r32(tp, RTASE_TX_CONFIG_0) & RTASE_HW_VER_MASK;
+	bool known_ver = false;
+
+	switch (hw_ver) {
+	case 0x00800000:
+	case 0x04000000:
+	case 0x04800000:
+		known_ver = true;
+		break;
+	}
+
+	return known_ver;
+}
+
+static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out,
+			    void __iomem **ioaddr_out)
+{
+	struct net_device *dev;
+	void __iomem *ioaddr;
+	int ret = -ENOMEM;
+
+	/* dev zeroed in alloc_etherdev */
+	dev = alloc_etherdev_mq(sizeof(struct rtase_private),
+				RTASE_FUNC_TXQ_NUM);
+	if (!dev)
+		goto err_out;
+
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0)
+		goto err_out_free_dev;
+
+	/* make sure PCI base addr 1 is MMIO */
+	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+		ret = -ENODEV;
+		goto err_out_disable;
+	}
+
+	/* check for weird/broken PCI region reporting */
+	if (pci_resource_len(pdev, 2) < RTASE_REGS_SIZE) {
+		ret = -ENODEV;
+		goto err_out_disable;
+	}
+
+	ret = pci_request_regions(pdev, KBUILD_MODNAME);
+	if (ret < 0)
+		goto err_out_disable;
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_err(&pdev->dev, "no usable dma addressing method\n");
+		goto err_out_free_res;
+	}
+
+	pci_set_master(pdev);
+
+	/* ioremap MMIO region */
+	ioaddr = ioremap(pci_resource_start(pdev, 2),
+			 pci_resource_len(pdev, 2));
+	if (!ioaddr) {
+		ret = -EIO;
+		goto err_out_free_res;
+	}
+
+	*ioaddr_out = ioaddr;
+	*dev_out = dev;
+
+	return ret;
+
+err_out_free_res:
+	pci_release_regions(pdev);
+
+err_out_disable:
+	pci_disable_device(pdev);
+
+err_out_free_dev:
+	free_netdev(dev);
+
+err_out:
+	*ioaddr_out = NULL;
+	*dev_out = NULL;
+
+	return ret;
+}
+
+static void rtase_release_board(struct pci_dev *pdev, struct net_device *dev,
+				void __iomem *ioaddr)
+{
+	const struct rtase_private *tp = netdev_priv(dev);
+
+	rtase_rar_set(tp, tp->dev->perm_addr);
+	iounmap(ioaddr);
+
+	if (tp->sw_flag & RTASE_SWF_MSIX_ENABLED)
+		pci_disable_msix(pdev);
+	else
+		pci_disable_msi(pdev);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	free_netdev(dev);
+}
+
+static int rtase_init_one(struct pci_dev *pdev,
+			  const struct pci_device_id *ent)
+{
+	struct net_device *dev = NULL;
+	struct rtase_int_vector *ivec;
+	void __iomem *ioaddr = NULL;
+	struct rtase_private *tp;
+	int ret, i;
+
+	if (!pdev->is_physfn && pdev->is_virtfn) {
+		dev_err(&pdev->dev,
+			"This module does not support a virtual function.");
+		return -EINVAL;
+	}
+
+	dev_dbg(&pdev->dev, "Automotive Switch Ethernet driver loaded\n");
+
+	ret = rtase_init_board(pdev, &dev, &ioaddr);
+	if (ret != 0)
+		return ret;
+
+	tp = netdev_priv(dev);
+	tp->mmio_addr = ioaddr;
+	tp->dev = dev;
+	tp->pdev = pdev;
+
+	/* identify chip attached to board */
+	if (!rtase_check_mac_version_valid(tp))
+		return dev_err_probe(&pdev->dev, -ENODEV,
+				     "unknown chip version, contact rtase maintainers (see MAINTAINERS file)\n");
+
+	rtase_init_software_variable(pdev, tp);
+	rtase_init_hardware(tp);
+
+	ret = rtase_alloc_interrupt(pdev, tp);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to alloc MSIX/MSI\n");
+		goto err_out_1;
+	}
+
+	rtase_init_netdev_ops(dev);
+
+	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
+	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			 NETIF_F_IP_CSUM | NETIF_F_HIGHDMA |
+			 NETIF_F_RXCSUM | NETIF_F_SG |
+			 NETIF_F_TSO | NETIF_F_IPV6_CSUM |
+			 NETIF_F_TSO6;
+
+	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
+			   NETIF_F_TSO | NETIF_F_RXCSUM |
+			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			   NETIF_F_RXALL | NETIF_F_RXFCS |
+			   NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
+
+	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
+			     NETIF_F_HIGHDMA;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netif_set_tso_max_size(dev, RTASE_LSO_64K);
+	netif_set_tso_max_segs(dev, RTASE_NIC_MAX_PHYS_BUF_COUNT_LSO2);
+
+	rtase_get_mac_address(dev);
+
+	tp->tally_vaddr = dma_alloc_coherent(&pdev->dev,
+					     sizeof(*tp->tally_vaddr),
+					     &tp->tally_paddr,
+					     GFP_KERNEL);
+	if (!tp->tally_vaddr) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	rtase_tally_counter_clear(tp);
+
+	pci_set_drvdata(pdev, dev);
+
+	netif_carrier_off(dev);
+
+	ret = register_netdev(dev);
+	if (ret != 0)
+		goto err_out;
+
+	netdev_dbg(dev, "%pM, IRQ %d\n", dev->dev_addr, dev->irq);
+
+	return 0;
+
+err_out:
+	if (tp->tally_vaddr) {
+		dma_free_coherent(&pdev->dev,
+				  sizeof(*tp->tally_vaddr),
+				  tp->tally_vaddr,
+				  tp->tally_paddr);
+
+		tp->tally_vaddr = NULL;
+	}
+
+err_out_1:
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		netif_napi_del(&ivec->napi);
+	}
+
+	rtase_release_board(pdev, dev, ioaddr);
+
+	return ret;
+}
+
+static void rtase_remove_one(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct rtase_private *tp = netdev_priv(dev);
+	struct rtase_int_vector *ivec;
+	u32 i;
+
+	unregister_netdev(dev);
+
+	for (i = 0; i < tp->int_nums; i++) {
+		ivec = &tp->int_vector[i];
+		netif_napi_del(&ivec->napi);
+	}
+
+	rtase_reset_interrupt(pdev, tp);
+	if (tp->tally_vaddr) {
+		dma_free_coherent(&pdev->dev,
+				  sizeof(*tp->tally_vaddr),
+				  tp->tally_vaddr,
+				  tp->tally_paddr);
+		tp->tally_vaddr = NULL;
+	}
+
+	rtase_release_board(pdev, dev, tp->mmio_addr);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static void rtase_shutdown(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	const struct rtase_private *tp;
+
+	tp = netdev_priv(dev);
+
+	if (netif_running(dev))
+		rtase_close(dev);
+
+	rtase_reset_interrupt(pdev, tp);
+}
+
+static int rtase_suspend(struct device *device)
+{
+	struct net_device *dev = dev_get_drvdata(device);
+
+	if (netif_running(dev)) {
+		netif_device_detach(dev);
+		rtase_hw_reset(dev);
+	}
+
+	return 0;
+}
+
+static int rtase_resume(struct device *device)
+{
+	struct net_device *dev = dev_get_drvdata(device);
+	struct rtase_private *tp = netdev_priv(dev);
+	int ret;
+
+	/* restore last modified mac address */
+	rtase_rar_set(tp, dev->dev_addr);
+
+	if (!netif_running(dev))
+		goto out;
+
+	rtase_wait_for_quiescence(dev);
+
+	rtase_tx_clear(tp);
+	rtase_rx_clear(tp);
+
+	ret = rtase_init_ring(dev);
+	if (ret) {
+		netdev_err(dev, "unable to init ring\n");
+		rtase_free_desc(tp);
+		return -ENOMEM;
+	}
+
+	rtase_hw_config(dev);
+	/* always link, so start to transmit & receive */
+	rtase_hw_start(dev);
+
+	netif_device_attach(dev);
+out:
+
+	return 0;
+}
+
+static const struct dev_pm_ops rtase_pm_ops = {
+	SYSTEM_SLEEP_PM_OPS(rtase_suspend, rtase_resume)
+};
+
+static struct pci_driver rtase_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = rtase_pci_tbl,
+	.probe = rtase_init_one,
+	.remove = rtase_remove_one,
+	.shutdown = rtase_shutdown,
+	.driver.pm = pm_ptr(&rtase_pm_ops),
+};
+
+module_pci_driver(rtase_pci_driver);
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 9893c91af105..a7de5cf6b317 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1052,6 +1052,7 @@ struct ravb_hw_info {
 	netdev_features_t net_features;
 	int stats_len;
 	u32 tccr_mask;
+	u32 tx_max_frame_size;
 	u32 rx_max_frame_size;
 	u32 rx_buffer_size;
 	u32 rx_desc_size;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c02fb296bf7d..d2a6518532f3 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -555,8 +555,16 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
 
 static void ravb_emac_init_rcar(struct net_device *ndev)
 {
-	/* Receive frame limit set register */
-	ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	/* Set receive frame length
+	 *
+	 * The length set here describes the frame from the destination address
+	 * up to and including the CRC data. However only the frame data,
+	 * excluding the CRC, are transferred to memory. To allow for the
+	 * largest frames add the CRC length to the maximum Rx descriptor size.
+	 */
+	ravb_write(ndev, priv->info->rx_max_frame_size + ETH_FCS_LEN, RFLR);
 
 	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
 	ravb_write(ndev, ECMR_ZPF | ECMR_DM |
@@ -1744,8 +1752,6 @@ static int ravb_get_ts_info(struct net_device *ndev,
 
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
@@ -1756,6 +1762,8 @@ static int ravb_get_ts_info(struct net_device *ndev,
 		(1 << HWTSTAMP_FILTER_ALL);
 	if (hw_info->gptp || hw_info->ccc_gac)
 		info->phc_index = ptp_clock_index(priv->ptp.clock);
+	else
+		info->phc_index = 0;
 
 	return 0;
 }
@@ -2674,6 +2682,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = {
 	.net_features = NETIF_F_RXCSUM,
 	.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
 	.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
+	.tx_max_frame_size = SZ_2K,
 	.rx_max_frame_size = SZ_2K,
 	.rx_buffer_size = SZ_2K +
 			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
@@ -2696,6 +2705,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = {
 	.net_features = NETIF_F_RXCSUM,
 	.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
 	.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
+	.tx_max_frame_size = SZ_2K,
 	.rx_max_frame_size = SZ_2K,
 	.rx_buffer_size = SZ_2K +
 			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
@@ -2721,6 +2731,7 @@ static const struct ravb_hw_info ravb_gen4_hw_info = {
 	.net_features = NETIF_F_RXCSUM,
 	.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
 	.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
+	.tx_max_frame_size = SZ_2K,
 	.rx_max_frame_size = SZ_2K,
 	.rx_buffer_size = SZ_2K +
 			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
@@ -2770,6 +2781,7 @@ static const struct ravb_hw_info gbeth_hw_info = {
 	.net_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
 	.stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth),
 	.tccr_mask = TCCR_TSRQ0,
+	.tx_max_frame_size = 1522,
 	.rx_max_frame_size = SZ_8K,
 	.rx_buffer_size = SZ_2K,
 	.rx_desc_size = sizeof(struct ravb_rx_desc),
@@ -2981,7 +2993,7 @@ static int ravb_probe(struct platform_device *pdev)
 	priv->avb_link_active_low =
 		of_property_read_bool(np, "renesas,ether-link-active-low");
 
-	ndev->max_mtu = info->rx_max_frame_size -
+	ndev->max_mtu = info->tx_max_frame_size -
 		(ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
 	ndev->min_mtu = ETH_MIN_MTU;
 
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index ff50e20856ec..b80aa27a7214 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1815,8 +1815,6 @@ static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts
 
 	info->phc_index = ptp_clock_index(rdev->priv->ptp_priv->clock);
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-				SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE |
 				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c
index 0e6cea42f007..f9f63c61d792 100644
--- a/drivers/net/ethernet/renesas/rtsn.c
+++ b/drivers/net/ethernet/renesas/rtsn.c
@@ -1219,8 +1219,6 @@ static int rtsn_get_ts_info(struct net_device *ndev,
 
 	info->phc_index = ptp_clock_index(priv->ptp_priv->clock);
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index e097ce3e69ea..84fa911c78db 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2575,7 +2575,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 	netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx);
 	rocker_carrier_init(rocker_port);
 
-	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG;
+	dev->features |= NETIF_F_SG;
+	dev->netns_local = true;
 
 	/* MTU range: 68 - 9000 */
 	dev->min_mtu = ROCKER_PORT_MIN_MTU;
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index c672f92d65e9..9319a2675e7b 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -847,9 +847,11 @@ static void ether3_remove(struct expansion_card *ec)
 {
 	struct net_device *dev = ecard_get_drvdata(ec);
 
+	ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2);
 	ecard_set_drvdata(ec, NULL);
 
 	unregister_netdev(dev);
+	del_timer_sync(&priv(dev)->timer);
 	free_netdev(dev);
 	ecard_release_resources(ec);
 }
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 7d69302ffa0a..de131fc5fa0b 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4302,3 +4302,130 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.sensor_event = efx_mcdi_sensor_event,
 	.rx_recycle_ring_size = efx_ef10_recycle_ring_size,
 };
+
+const struct efx_nic_type efx_x4_nic_type = {
+	.is_vf = false,
+	.mem_bar = efx_ef10_pf_mem_bar,
+	.mem_map_size = efx_ef10_mem_map_size,
+	.probe = efx_ef10_probe_pf,
+	.remove = efx_ef10_remove,
+	.dimension_resources = efx_ef10_dimension_resources,
+	.init = efx_ef10_init_nic,
+	.fini = efx_ef10_fini_nic,
+	.map_reset_reason = efx_ef10_map_reset_reason,
+	.map_reset_flags = efx_ef10_map_reset_flags,
+	.reset = efx_ef10_reset,
+	.probe_port = efx_mcdi_port_probe,
+	.remove_port = efx_mcdi_port_remove,
+	.fini_dmaq = efx_fini_dmaq,
+	.prepare_flr = efx_ef10_prepare_flr,
+	.finish_flr = efx_port_dummy_op_void,
+	.describe_stats = efx_ef10_describe_stats,
+	.update_stats = efx_ef10_update_stats_pf,
+	.start_stats = efx_mcdi_mac_start_stats,
+	.pull_stats = efx_mcdi_mac_pull_stats,
+	.stop_stats = efx_mcdi_mac_stop_stats,
+	.push_irq_moderation = efx_ef10_push_irq_moderation,
+	.reconfigure_mac = efx_ef10_mac_reconfigure,
+	.check_mac_fault = efx_mcdi_mac_check_fault,
+	.reconfigure_port = efx_mcdi_port_reconfigure,
+	.get_wol = efx_ef10_get_wol,
+	.set_wol = efx_ef10_set_wol,
+	.resume_wol = efx_port_dummy_op_void,
+	.get_fec_stats = efx_ef10_get_fec_stats,
+	.test_chip = efx_ef10_test_chip,
+	.test_nvram = efx_mcdi_nvram_test_all,
+	.mcdi_request = efx_ef10_mcdi_request,
+	.mcdi_poll_response = efx_ef10_mcdi_poll_response,
+	.mcdi_read_response = efx_ef10_mcdi_read_response,
+	.mcdi_poll_reboot = efx_ef10_mcdi_poll_reboot,
+	.mcdi_reboot_detected = efx_ef10_mcdi_reboot_detected,
+	.irq_enable_master = efx_port_dummy_op_void,
+	.irq_test_generate = efx_ef10_irq_test_generate,
+	.irq_disable_non_ev = efx_port_dummy_op_void,
+	.irq_handle_msi = efx_ef10_msi_interrupt,
+	.tx_probe = efx_ef10_tx_probe,
+	.tx_init = efx_ef10_tx_init,
+	.tx_write = efx_ef10_tx_write,
+	.tx_limit_len = efx_ef10_tx_limit_len,
+	.tx_enqueue = __efx_enqueue_skb,
+	.rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config,
+	.rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
+	.rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config,
+	.rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config,
+	.rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+	.rx_probe = efx_mcdi_rx_probe,
+	.rx_init = efx_mcdi_rx_init,
+	.rx_remove = efx_mcdi_rx_remove,
+	.rx_write = efx_ef10_rx_write,
+	.rx_defer_refill = efx_ef10_rx_defer_refill,
+	.rx_packet = __efx_rx_packet,
+	.ev_probe = efx_mcdi_ev_probe,
+	.ev_init = efx_ef10_ev_init,
+	.ev_fini = efx_mcdi_ev_fini,
+	.ev_remove = efx_mcdi_ev_remove,
+	.ev_process = efx_ef10_ev_process,
+	.ev_read_ack = efx_ef10_ev_read_ack,
+	.ev_test_generate = efx_ef10_ev_test_generate,
+	.filter_table_probe = efx_ef10_filter_table_probe,
+	.filter_table_restore = efx_mcdi_filter_table_restore,
+	.filter_table_remove = efx_ef10_filter_table_remove,
+	.filter_insert = efx_mcdi_filter_insert,
+	.filter_remove_safe = efx_mcdi_filter_remove_safe,
+	.filter_get_safe = efx_mcdi_filter_get_safe,
+	.filter_clear_rx = efx_mcdi_filter_clear_rx,
+	.filter_count_rx_used = efx_mcdi_filter_count_rx_used,
+	.filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit,
+	.filter_get_rx_ids = efx_mcdi_filter_get_rx_ids,
+#ifdef CONFIG_RFS_ACCEL
+	.filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one,
+#endif
+#ifdef CONFIG_SFC_MTD
+	.mtd_probe = efx_ef10_mtd_probe,
+	.mtd_rename = efx_mcdi_mtd_rename,
+	.mtd_read = efx_mcdi_mtd_read,
+	.mtd_erase = efx_mcdi_mtd_erase,
+	.mtd_write = efx_mcdi_mtd_write,
+	.mtd_sync = efx_mcdi_mtd_sync,
+#endif
+	.ptp_write_host_time = efx_ef10_ptp_write_host_time,
+	.ptp_set_ts_sync_events = efx_ef10_ptp_set_ts_sync_events,
+	.ptp_set_ts_config = efx_ef10_ptp_set_ts_config,
+	.vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid,
+	.vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid,
+	.udp_tnl_push_ports = efx_ef10_udp_tnl_push_ports,
+	.udp_tnl_has_port = efx_ef10_udp_tnl_has_port,
+#ifdef CONFIG_SFC_SRIOV
+	/* currently set to the VF versions of these functions
+	 * because SRIOV will be reimplemented later.
+	 */
+	.vswitching_probe = efx_ef10_vswitching_probe_vf,
+	.vswitching_restore = efx_ef10_vswitching_restore_vf,
+	.vswitching_remove = efx_ef10_vswitching_remove_vf,
+#endif
+	.get_mac_address = efx_ef10_get_mac_address_pf,
+	.set_mac_address = efx_ef10_set_mac_address,
+	.tso_versions = efx_ef10_tso_versions,
+
+	.get_phys_port_id = efx_ef10_get_phys_port_id,
+	.revision = EFX_REV_X4,
+	.max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH),
+	.rx_prefix_size = ES_DZ_RX_PREFIX_SIZE,
+	.rx_hash_offset = ES_DZ_RX_PREFIX_HASH_OFST,
+	.rx_ts_offset = ES_DZ_RX_PREFIX_TSTAMP_OFST,
+	.can_rx_scatter = true,
+	.always_rx_scatter = true,
+	.option_descriptors = true,
+	.min_interrupt_mode = EFX_INT_MODE_MSIX,
+	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
+	.offload_features = EF10_OFFLOAD_FEATURES,
+	.mcdi_max_ver = 2,
+	.max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS,
+	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
+			    1 << HWTSTAMP_FILTER_ALL,
+	.check_caps = ef10_check_caps,
+	.print_additional_fwver = efx_ef10_print_additional_fwver,
+	.sensor_event = efx_mcdi_sensor_event,
+	.rx_recycle_ring_size = efx_ef10_recycle_ring_size,
+};
+
diff --git a/drivers/net/ethernet/sfc/ef100_ethtool.c b/drivers/net/ethernet/sfc/ef100_ethtool.c
index 896ffca4aee2..5c2551369812 100644
--- a/drivers/net/ethernet/sfc/ef100_ethtool.c
+++ b/drivers/net/ethernet/sfc/ef100_ethtool.c
@@ -37,7 +37,6 @@ ef100_ethtool_get_ringparam(struct net_device *net_dev,
 /*	Ethtool options available
  */
 const struct ethtool_ops ef100_ethtool_ops = {
-	.cap_rss_ctx_supported	= true,
 	.get_drvinfo		= efx_ethtool_get_drvinfo,
 	.get_msglevel		= efx_ethtool_get_msglevel,
 	.set_msglevel		= efx_ethtool_set_msglevel,
@@ -59,6 +58,7 @@ const struct ethtool_ops ef100_ethtool_ops = {
 
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
 	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
+	.rxfh_per_ctx_key	= true,
 	.rxfh_priv_size		= sizeof(struct efx_rss_context_priv),
 	.get_rxfh		= efx_ethtool_get_rxfh,
 	.set_rxfh		= efx_ethtool_set_rxfh,
diff --git a/drivers/net/ethernet/sfc/ef100_rep.c b/drivers/net/ethernet/sfc/ef100_rep.c
index 0b3083ef0ead..e923e1796369 100644
--- a/drivers/net/ethernet/sfc/ef100_rep.c
+++ b/drivers/net/ethernet/sfc/ef100_rep.c
@@ -233,8 +233,8 @@ static struct efx_rep *efx_ef100_rep_create_netdev(struct efx_nic *efx,
 	net_dev->ethtool_ops = &efx_ef100_rep_ethtool_ops;
 	net_dev->min_mtu = EFX_MIN_MTU;
 	net_dev->max_mtu = EFX_MAX_MTU;
-	net_dev->features |= NETIF_F_LLTX;
-	net_dev->hw_features |= NETIF_F_LLTX;
+	net_dev->lltx = true;
+
 	return efv;
 fail1:
 	free_netdev(net_dev);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 6f1a01ded7d4..36b3b57e2055 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -821,6 +821,10 @@ static const struct pci_device_id efx_pci_table[] = {
 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0c03),  /* X4 PF (FF/LL) */
+	 .driver_data = (unsigned long)&efx_x4_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x2c03),  /* X4 PF (FF only) */
+	 .driver_data = (unsigned long)&efx_x4_nic_type},
 	{0}			/* end of list */
 };
 
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 7c887160e2ef..bb1930818beb 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -230,17 +230,11 @@ static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 {
 	struct efx_nic *efx = efx_netdev_priv(net_dev);
 
-	/* Software capabilities */
-	ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE |
-				    SOF_TIMESTAMPING_SOFTWARE);
-	ts_info->phc_index = -1;
-
 	efx_ptp_get_ts_info(efx, ts_info);
 	return 0;
 }
 
 const struct ethtool_ops efx_ethtool_ops = {
-	.cap_rss_ctx_supported	= true,
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_USECS_IRQ |
 				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
@@ -268,6 +262,7 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.set_rxnfc		= efx_ethtool_set_rxnfc,
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
 	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
+	.rxfh_per_ctx_key	= true,
 	.rxfh_priv_size		= sizeof(struct efx_rss_context_priv),
 	.get_rxfh		= efx_ethtool_get_rxfh,
 	.set_rxfh		= efx_ethtool_set_rxfh,
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 1db64fc6e909..9fa5c4c713ab 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -211,4 +211,6 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
 extern const struct efx_nic_type efx_hunt_a0_nic_type;
 extern const struct efx_nic_type efx_hunt_a0_vf_nic_type;
 
+extern const struct efx_nic_type efx_x4_nic_type;
+
 #endif /* EFX_NIC_H */
diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h
index 466df5348b29..7ec4ac7b7ff5 100644
--- a/drivers/net/ethernet/sfc/nic_common.h
+++ b/drivers/net/ethernet/sfc/nic_common.h
@@ -21,6 +21,7 @@ enum {
 	 */
 	EFX_REV_HUNT_A0 = 4,
 	EFX_REV_EF100 = 5,
+	EFX_REV_X4 = 6,
 };
 
 static inline int efx_nic_rev(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 6fd2fdbaa418..aaacdcfa54ae 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -884,7 +884,7 @@ static void efx_ptp_read_timeset(MCDI_DECLARE_STRUCT_PTR(data),
 	timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART);
 	timeset->major = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MAJOR);
 	timeset->minor = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MINOR);
-	timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND),
+	timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND);
 	timeset->wait = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS);
 
 	/* Ignore seconds */
diff --git a/drivers/net/ethernet/sfc/siena/efx_common.c b/drivers/net/ethernet/sfc/siena/efx_common.c
index cf195162e270..a0966f879664 100644
--- a/drivers/net/ethernet/sfc/siena/efx_common.c
+++ b/drivers/net/ethernet/sfc/siena/efx_common.c
@@ -725,7 +725,6 @@ void efx_siena_reset_down(struct efx_nic *efx, enum reset_type method)
 
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
-	mutex_lock(&efx->rss_lock);
 	efx->type->fini(efx);
 }
 
@@ -786,9 +785,6 @@ int efx_siena_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 			   " VFs may not function\n", rc);
 #endif
 
-	if (efx->type->rx_restore_rss_contexts)
-		efx->type->rx_restore_rss_contexts(efx);
-	mutex_unlock(&efx->rss_lock);
 	efx->type->filter_table_restore(efx);
 	up_write(&efx->filter_sem);
 	if (efx->type->sriov_reset)
@@ -806,7 +802,6 @@ int efx_siena_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 fail:
 	efx->port_initialized = false;
 
-	mutex_unlock(&efx->rss_lock);
 	up_write(&efx->filter_sem);
 	mutex_unlock(&efx->mac_lock);
 
@@ -1016,9 +1011,7 @@ int efx_siena_init_struct(struct efx_nic *efx,
 		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
-	INIT_LIST_HEAD(&efx->rss_context.list);
 	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
-	mutex_init(&efx->rss_lock);
 	efx->vport_id = EVB_PORT_ID_ASSIGNED;
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c
index 4c182d4edfc2..c5ad84db9613 100644
--- a/drivers/net/ethernet/sfc/siena/ethtool.c
+++ b/drivers/net/ethernet/sfc/siena/ethtool.c
@@ -230,17 +230,11 @@ static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	/* Software capabilities */
-	ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE |
-				    SOF_TIMESTAMPING_SOFTWARE);
-	ts_info->phc_index = -1;
-
 	efx_siena_ptp_get_ts_info(efx, ts_info);
 	return 0;
 }
 
 const struct ethtool_ops efx_siena_ethtool_ops = {
-	.cap_rss_ctx_supported	= true,
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_USECS_IRQ |
 				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
diff --git a/drivers/net/ethernet/sfc/siena/ethtool_common.c b/drivers/net/ethernet/sfc/siena/ethtool_common.c
index 5f0a8127e967..075fef64de68 100644
--- a/drivers/net/ethernet/sfc/siena/ethtool_common.c
+++ b/drivers/net/ethernet/sfc/siena/ethtool_common.c
@@ -820,27 +820,16 @@ int efx_siena_ethtool_get_rxnfc(struct net_device *net_dev,
 		return 0;
 
 	case ETHTOOL_GRXFH: {
-		struct efx_rss_context *ctx = &efx->rss_context;
 		__u64 data;
 
-		mutex_lock(&efx->rss_lock);
-		if (info->flow_type & FLOW_RSS && info->rss_context) {
-			ctx = efx_siena_find_rss_context_entry(efx,
-							info->rss_context);
-			if (!ctx) {
-				rc = -ENOENT;
-				goto out_unlock;
-			}
-		}
-
 		data = 0;
-		if (!efx_rss_active(ctx)) /* No RSS */
-			goto out_setdata_unlock;
+		if (!efx_rss_active(&efx->rss_context)) /* No RSS */
+			goto out_setdata;
 
-		switch (info->flow_type & ~FLOW_RSS) {
+		switch (info->flow_type) {
 		case UDP_V4_FLOW:
 		case UDP_V6_FLOW:
-			if (ctx->rx_hash_udp_4tuple)
+			if (efx->rss_context.rx_hash_udp_4tuple)
 				data = (RXH_L4_B_0_1 | RXH_L4_B_2_3 |
 					RXH_IP_SRC | RXH_IP_DST);
 			else
@@ -862,10 +851,8 @@ int efx_siena_ethtool_get_rxnfc(struct net_device *net_dev,
 		default:
 			break;
 		}
-out_setdata_unlock:
+out_setdata:
 		info->data = data;
-out_unlock:
-		mutex_unlock(&efx->rss_lock);
 		return rc;
 	}
 
@@ -1164,47 +1151,12 @@ u32 efx_siena_ethtool_get_rxfh_key_size(struct net_device *net_dev)
 	return efx->type->rx_hash_key_size;
 }
 
-static int efx_siena_ethtool_get_rxfh_context(struct net_device *net_dev,
-					      struct ethtool_rxfh_param *rxfh)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_rss_context *ctx;
-	int rc = 0;
-
-	if (!efx->type->rx_pull_rss_context_config)
-		return -EOPNOTSUPP;
-
-	mutex_lock(&efx->rss_lock);
-	ctx = efx_siena_find_rss_context_entry(efx, rxfh->rss_context);
-	if (!ctx) {
-		rc = -ENOENT;
-		goto out_unlock;
-	}
-	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
-	if (rc)
-		goto out_unlock;
-
-	rxfh->hfunc = ETH_RSS_HASH_TOP;
-	if (rxfh->indir)
-		memcpy(rxfh->indir, ctx->rx_indir_table,
-		       sizeof(ctx->rx_indir_table));
-	if (rxfh->key)
-		memcpy(rxfh->key, ctx->rx_hash_key,
-		       efx->type->rx_hash_key_size);
-out_unlock:
-	mutex_unlock(&efx->rss_lock);
-	return rc;
-}
-
 int efx_siena_ethtool_get_rxfh(struct net_device *net_dev,
 			       struct ethtool_rxfh_param *rxfh)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	int rc;
 
-	if (rxfh->rss_context)
-		return efx_siena_ethtool_get_rxfh_context(net_dev, rxfh);
-
 	rc = efx->type->rx_pull_rss_config(efx);
 	if (rc)
 		return rc;
@@ -1219,70 +1171,6 @@ int efx_siena_ethtool_get_rxfh(struct net_device *net_dev,
 	return 0;
 }
 
-static int efx_siena_ethtool_set_rxfh_context(struct net_device *net_dev,
-					      struct ethtool_rxfh_param *rxfh,
-					      struct netlink_ext_ack *extack)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	u32 *rss_context = &rxfh->rss_context;
-	struct efx_rss_context *ctx;
-	u32 *indir = rxfh->indir;
-	bool allocated = false;
-	u8 *key = rxfh->key;
-	int rc;
-
-	if (!efx->type->rx_push_rss_context_config)
-		return -EOPNOTSUPP;
-
-	mutex_lock(&efx->rss_lock);
-
-	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-		if (rxfh->rss_delete) {
-			/* alloc + delete == Nothing to do */
-			rc = -EINVAL;
-			goto out_unlock;
-		}
-		ctx = efx_siena_alloc_rss_context_entry(efx);
-		if (!ctx) {
-			rc = -ENOMEM;
-			goto out_unlock;
-		}
-		ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
-		/* Initialise indir table and key to defaults */
-		efx_siena_set_default_rx_indir_table(efx, ctx);
-		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
-		allocated = true;
-	} else {
-		ctx = efx_siena_find_rss_context_entry(efx, *rss_context);
-		if (!ctx) {
-			rc = -ENOENT;
-			goto out_unlock;
-		}
-	}
-
-	if (rxfh->rss_delete) {
-		/* delete this context */
-		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
-		if (!rc)
-			efx_siena_free_rss_context_entry(ctx);
-		goto out_unlock;
-	}
-
-	if (!key)
-		key = ctx->rx_hash_key;
-	if (!indir)
-		indir = ctx->rx_indir_table;
-
-	rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
-	if (rc && allocated)
-		efx_siena_free_rss_context_entry(ctx);
-	else
-		*rss_context = ctx->user_id;
-out_unlock:
-	mutex_unlock(&efx->rss_lock);
-	return rc;
-}
-
 int efx_siena_ethtool_set_rxfh(struct net_device *net_dev,
 			       struct ethtool_rxfh_param *rxfh,
 			       struct netlink_ext_ack *extack)
@@ -1296,9 +1184,6 @@ int efx_siena_ethtool_set_rxfh(struct net_device *net_dev,
 	    rxfh->hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
 
-	if (rxfh->rss_context)
-		efx_siena_ethtool_set_rxfh_context(net_dev, rxfh, extack);
-
 	if (!indir && !key)
 		return 0;
 
diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h
index 94152f595acd..3fa7c652ae9b 100644
--- a/drivers/net/ethernet/sfc/siena/net_driver.h
+++ b/drivers/net/ethernet/sfc/siena/net_driver.h
@@ -707,20 +707,14 @@ struct vfdi_status;
 /* The reserved RSS context value */
 #define EFX_MCDI_RSS_CONTEXT_INVALID	0xffffffff
 /**
- * struct efx_rss_context - A user-defined RSS context for filtering
- * @list: node of linked list on which this struct is stored
- * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
- *	%EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
- *	For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
- * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * struct efx_rss_context - An RSS context for filtering
+ * @context_id: 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
  * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
  * @rx_hash_key: Toeplitz hash key for this RSS context
  * @indir_table: Indirection table for this RSS context
  */
 struct efx_rss_context {
-	struct list_head list;
 	u32 context_id;
-	u32 user_id;
 	bool rx_hash_udp_4tuple;
 	u8 rx_hash_key[40];
 	u32 rx_indir_table[128];
@@ -851,9 +845,7 @@ enum efx_xdp_tx_queues_mode {
  * @rx_packet_ts_offset: Offset of timestamp from start of packet data
  *	(valid only if channel->sync_timestamps_enabled; always negative)
  * @rx_scatter: Scatter mode enabled for receives
- * @rss_context: Main RSS context.  Its @list member is the head of the list of
- *	RSS contexts created by user requests
- * @rss_lock: Protects custom RSS context software state in @rss_context.list
+ * @rss_context: Main RSS context
  * @vport_id: The function's vport ID, only relevant for PFs
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
@@ -1018,7 +1010,6 @@ struct efx_nic {
 	int rx_packet_ts_offset;
 	bool rx_scatter;
 	struct efx_rss_context rss_context;
-	struct mutex rss_lock;
 	u32 vport_id;
 
 	unsigned int_error_count;
@@ -1220,10 +1211,6 @@ struct efx_udp_tunnel {
  * @tx_enqueue: Add an SKB to TX queue
  * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
  * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
- * @rx_push_rss_context_config: Write RSS hash key and indirection table for
- *	user RSS context to the NIC
- * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
- *	RSS context back from the NIC
  * @rx_probe: Allocate resources for RX queue
  * @rx_init: Initialise RX queue on the NIC
  * @rx_remove: Free resources for RX queue
@@ -1366,13 +1353,6 @@ struct efx_nic_type {
 	int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
 				  const u32 *rx_indir_table, const u8 *key);
 	int (*rx_pull_rss_config)(struct efx_nic *efx);
-	int (*rx_push_rss_context_config)(struct efx_nic *efx,
-					  struct efx_rss_context *ctx,
-					  const u32 *rx_indir_table,
-					  const u8 *key);
-	int (*rx_pull_rss_context_config)(struct efx_nic *efx,
-					  struct efx_rss_context *ctx);
-	void (*rx_restore_rss_contexts)(struct efx_nic *efx);
 	int (*rx_probe)(struct efx_rx_queue *rx_queue);
 	void (*rx_init)(struct efx_rx_queue *rx_queue);
 	void (*rx_remove)(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/siena/ptp.c b/drivers/net/ethernet/sfc/siena/ptp.c
index c473a4b6dd44..85005196b4c5 100644
--- a/drivers/net/ethernet/sfc/siena/ptp.c
+++ b/drivers/net/ethernet/sfc/siena/ptp.c
@@ -897,7 +897,7 @@ static void efx_ptp_read_timeset(MCDI_DECLARE_STRUCT_PTR(data),
 	timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART);
 	timeset->major = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MAJOR);
 	timeset->minor = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MINOR);
-	timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND),
+	timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND);
 	timeset->wait = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS);
 
 	/* Ignore seconds */
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c
index 219fb358a646..082e35c6caaa 100644
--- a/drivers/net/ethernet/sfc/siena/rx_common.c
+++ b/drivers/net/ethernet/sfc/siena/rx_common.c
@@ -558,62 +558,6 @@ efx_siena_rx_packet_gro(struct efx_channel *channel,
 	napi_gro_frags(napi);
 }
 
-/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
- * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
- */
-struct efx_rss_context *efx_siena_alloc_rss_context_entry(struct efx_nic *efx)
-{
-	struct list_head *head = &efx->rss_context.list;
-	struct efx_rss_context *ctx, *new;
-	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	/* Search for first gap in the numbering */
-	list_for_each_entry(ctx, head, list) {
-		if (ctx->user_id != id)
-			break;
-		id++;
-		/* Check for wrap.  If this happens, we have nearly 2^32
-		 * allocated RSS contexts, which seems unlikely.
-		 */
-		if (WARN_ON_ONCE(!id))
-			return NULL;
-	}
-
-	/* Create the new entry */
-	new = kmalloc(sizeof(*new), GFP_KERNEL);
-	if (!new)
-		return NULL;
-	new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
-	new->rx_hash_udp_4tuple = false;
-
-	/* Insert the new entry into the gap */
-	new->user_id = id;
-	list_add_tail(&new->list, &ctx->list);
-	return new;
-}
-
-struct efx_rss_context *efx_siena_find_rss_context_entry(struct efx_nic *efx,
-							 u32 id)
-{
-	struct list_head *head = &efx->rss_context.list;
-	struct efx_rss_context *ctx;
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	list_for_each_entry(ctx, head, list)
-		if (ctx->user_id == id)
-			return ctx;
-	return NULL;
-}
-
-void efx_siena_free_rss_context_entry(struct efx_rss_context *ctx)
-{
-	list_del(&ctx->list);
-	kfree(ctx);
-}
-
 void efx_siena_set_default_rx_indir_table(struct efx_nic *efx,
 					  struct efx_rss_context *ctx)
 {
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.h b/drivers/net/ethernet/sfc/siena/rx_common.h
index 6b37f83ecb30..f90a8320d396 100644
--- a/drivers/net/ethernet/sfc/siena/rx_common.h
+++ b/drivers/net/ethernet/sfc/siena/rx_common.h
@@ -78,10 +78,6 @@ efx_siena_rx_packet_gro(struct efx_channel *channel,
 			struct efx_rx_buffer *rx_buf,
 			unsigned int n_frags, u8 *eh, __wsum csum);
 
-struct efx_rss_context *efx_siena_alloc_rss_context_entry(struct efx_nic *efx);
-struct efx_rss_context *efx_siena_find_rss_context_entry(struct efx_nic *efx,
-							 u32 id);
-void efx_siena_free_rss_context_entry(struct efx_rss_context *ctx);
 void efx_siena_set_default_rx_indir_table(struct efx_nic *efx,
 					  struct efx_rss_context *ctx);
 
diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c
index c44088424323..a421b0123506 100644
--- a/drivers/net/ethernet/sfc/tc_counters.c
+++ b/drivers/net/ethernet/sfc/tc_counters.c
@@ -249,7 +249,7 @@ struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
 					       &ctr->linkage,
 					       efx_tc_counter_id_ht_params);
 			kfree(ctr);
-			return (void *)cnt; /* it's an ERR_PTR */
+			return ERR_CAST(cnt);
 		}
 		ctr->cnt = cnt;
 		refcount_set(&ctr->ref, 1);
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 907498848028..a5e23e2da90f 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2355,7 +2355,7 @@ static int smc_drv_probe(struct platform_device *pdev)
 	 * the resource supplies a trigger, override the irqflags with
 	 * the trigger flags from the resource.
 	 */
-	irq_resflags = irqd_get_trigger_type(irq_get_irq_data(ndev->irq));
+	irq_resflags = irq_get_trigger_type(ndev->irq);
 	if (irq_flags == -1 || irq_resflags & IRQF_TRIGGER_MASK)
 		irq_flags = irq_resflags & IRQF_TRIGGER_MASK;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index cd36ff4da68c..684489156dce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -29,6 +29,7 @@
 /* Synopsys Core versions */
 #define	DWMAC_CORE_3_40		0x34
 #define	DWMAC_CORE_3_50		0x35
+#define	DWMAC_CORE_3_70		0x37
 #define	DWMAC_CORE_4_00		0x40
 #define DWMAC_CORE_4_10		0x41
 #define DWMAC_CORE_5_00		0x50
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 9e40c28d453a..bfe6e2d631bd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -8,15 +8,90 @@
 #include <linux/device.h>
 #include <linux/of_irq.h>
 #include "stmmac.h"
+#include "dwmac_dma.h"
+#include "dwmac1000.h"
+
+/* Normal Loongson Tx Summary */
+#define DMA_INTR_ENA_NIE_TX_LOONGSON	0x00040000
+/* Normal Loongson Rx Summary */
+#define DMA_INTR_ENA_NIE_RX_LOONGSON	0x00020000
+
+#define DMA_INTR_NORMAL_LOONGSON	(DMA_INTR_ENA_NIE_TX_LOONGSON | \
+					 DMA_INTR_ENA_NIE_RX_LOONGSON | \
+					 DMA_INTR_ENA_RIE | DMA_INTR_ENA_TIE)
+
+/* Abnormal Loongson Tx Summary */
+#define DMA_INTR_ENA_AIE_TX_LOONGSON	0x00010000
+/* Abnormal Loongson Rx Summary */
+#define DMA_INTR_ENA_AIE_RX_LOONGSON	0x00008000
+
+#define DMA_INTR_ABNORMAL_LOONGSON	(DMA_INTR_ENA_AIE_TX_LOONGSON | \
+					 DMA_INTR_ENA_AIE_RX_LOONGSON | \
+					 DMA_INTR_ENA_FBE | DMA_INTR_ENA_UNE)
+
+#define DMA_INTR_DEFAULT_MASK_LOONGSON	(DMA_INTR_NORMAL_LOONGSON | \
+					 DMA_INTR_ABNORMAL_LOONGSON)
+
+/* Normal Loongson Tx Interrupt Summary */
+#define DMA_STATUS_NIS_TX_LOONGSON	0x00040000
+/* Normal Loongson Rx Interrupt Summary */
+#define DMA_STATUS_NIS_RX_LOONGSON	0x00020000
+
+/* Abnormal Loongson Tx Interrupt Summary */
+#define DMA_STATUS_AIS_TX_LOONGSON	0x00010000
+/* Abnormal Loongson Rx Interrupt Summary */
+#define DMA_STATUS_AIS_RX_LOONGSON	0x00008000
+
+/* Fatal Loongson Tx Bus Error Interrupt */
+#define DMA_STATUS_FBI_TX_LOONGSON	0x00002000
+/* Fatal Loongson Rx Bus Error Interrupt */
+#define DMA_STATUS_FBI_RX_LOONGSON	0x00001000
+
+#define DMA_STATUS_MSK_COMMON_LOONGSON	(DMA_STATUS_NIS_TX_LOONGSON | \
+					 DMA_STATUS_NIS_RX_LOONGSON | \
+					 DMA_STATUS_AIS_TX_LOONGSON | \
+					 DMA_STATUS_AIS_RX_LOONGSON | \
+					 DMA_STATUS_FBI_TX_LOONGSON | \
+					 DMA_STATUS_FBI_RX_LOONGSON)
+
+#define DMA_STATUS_MSK_RX_LOONGSON	(DMA_STATUS_ERI | DMA_STATUS_RWT | \
+					 DMA_STATUS_RPS | DMA_STATUS_RU  | \
+					 DMA_STATUS_RI  | DMA_STATUS_OVF | \
+					 DMA_STATUS_MSK_COMMON_LOONGSON)
+
+#define DMA_STATUS_MSK_TX_LOONGSON	(DMA_STATUS_ETI | DMA_STATUS_UNF | \
+					 DMA_STATUS_TJT | DMA_STATUS_TU  | \
+					 DMA_STATUS_TPS | DMA_STATUS_TI  | \
+					 DMA_STATUS_MSK_COMMON_LOONGSON)
+
+#define PCI_DEVICE_ID_LOONGSON_GMAC	0x7a03
+#define PCI_DEVICE_ID_LOONGSON_GNET	0x7a13
+#define DWMAC_CORE_LS_MULTICHAN	0x10	/* Loongson custom ID */
+#define CHANNEL_NUM			8
+
+struct loongson_data {
+	u32 loongson_id;
+	struct device *dev;
+};
+
+struct stmmac_pci_info {
+	int (*setup)(struct pci_dev *pdev, struct plat_stmmacenet_data *plat);
+};
 
-static int loongson_default_data(struct plat_stmmacenet_data *plat)
+static void loongson_default_data(struct pci_dev *pdev,
+				  struct plat_stmmacenet_data *plat)
 {
+	/* Get bus_id, this can be overwritten later */
+	plat->bus_id = pci_dev_id(pdev);
+
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
 	plat->has_gmac = 1;
 	plat->force_sf_dma_mode = 1;
 
 	/* Set default value for multicast hash bins */
-	plat->multicast_filter_bins = HASH_TABLE_SIZE;
+	plat->multicast_filter_bins = 256;
+
+	plat->mac_interface = PHY_INTERFACE_MODE_NA;
 
 	/* Set default value for unicast filter entries */
 	plat->unicast_filter_entries = 1;
@@ -24,10 +99,6 @@ static int loongson_default_data(struct plat_stmmacenet_data *plat)
 	/* Set the maxmtu to a default of JUMBO_LEN */
 	plat->maxmtu = JUMBO_LEN;
 
-	/* Set default number of RX and TX queues to use */
-	plat->tx_queues_to_use = 1;
-	plat->rx_queues_to_use = 1;
-
 	/* Disable Priority config by default */
 	plat->tx_queues_cfg[0].use_prio = false;
 	plat->rx_queues_cfg[0].use_prio = false;
@@ -35,30 +106,424 @@ static int loongson_default_data(struct plat_stmmacenet_data *plat)
 	/* Disable RX queues routing by default */
 	plat->rx_queues_cfg[0].pkt_route = 0x0;
 
+	plat->clk_ref_rate = 125000000;
+	plat->clk_ptp_rate = 125000000;
+
 	/* Default to phy auto-detection */
 	plat->phy_addr = -1;
 
 	plat->dma_cfg->pbl = 32;
 	plat->dma_cfg->pblx8 = true;
+}
+
+static int loongson_gmac_data(struct pci_dev *pdev,
+			      struct plat_stmmacenet_data *plat)
+{
+	struct loongson_data *ld;
+	int i;
+
+	ld = plat->bsp_priv;
+
+	loongson_default_data(pdev, plat);
+
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) {
+		plat->rx_queues_to_use = CHANNEL_NUM;
+		plat->tx_queues_to_use = CHANNEL_NUM;
+
+		/* Only channel 0 supports checksum,
+		 * so turn off checksum to enable multiple channels.
+		 */
+		for (i = 1; i < CHANNEL_NUM; i++)
+			plat->tx_queues_cfg[i].coe_unsupported = 1;
+	} else {
+		plat->tx_queues_to_use = 1;
+		plat->rx_queues_to_use = 1;
+	}
+
+	plat->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
 
-	plat->multicast_filter_bins = 256;
 	return 0;
 }
 
-static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static struct stmmac_pci_info loongson_gmac_pci_info = {
+	.setup = loongson_gmac_data,
+};
+
+static void loongson_gnet_fix_speed(void *priv, unsigned int speed,
+				    unsigned int mode)
 {
-	struct plat_stmmacenet_data *plat;
-	struct stmmac_resources res;
-	struct device_node *np;
-	int ret, i, phy_mode;
+	struct loongson_data *ld = (struct loongson_data *)priv;
+	struct net_device *ndev = dev_get_drvdata(ld->dev);
+	struct stmmac_priv *ptr = netdev_priv(ndev);
+
+	/* The integrated PHY has a weird problem with switching from the low
+	 * speeds to 1000Mbps mode. The speedup procedure requires the PHY-link
+	 * re-negotiation.
+	 */
+	if (speed == SPEED_1000) {
+		if (readl(ptr->ioaddr + MAC_CTRL_REG) &
+		    GMAC_CONTROL_PS)
+			/* Word around hardware bug, restart autoneg */
+			phy_restart_aneg(ndev->phydev);
+	}
+}
 
-	np = dev_of_node(&pdev->dev);
+static int loongson_gnet_data(struct pci_dev *pdev,
+			      struct plat_stmmacenet_data *plat)
+{
+	struct loongson_data *ld;
+	int i;
 
-	if (!np) {
-		pr_info("dwmac_loongson_pci: No OF node\n");
-		return -ENODEV;
+	ld = plat->bsp_priv;
+
+	loongson_default_data(pdev, plat);
+
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) {
+		plat->rx_queues_to_use = CHANNEL_NUM;
+		plat->tx_queues_to_use = CHANNEL_NUM;
+
+		/* Only channel 0 supports checksum,
+		 * so turn off checksum to enable multiple channels.
+		 */
+		for (i = 1; i < CHANNEL_NUM; i++)
+			plat->tx_queues_cfg[i].coe_unsupported = 1;
+	} else {
+		plat->tx_queues_to_use = 1;
+		plat->rx_queues_to_use = 1;
 	}
 
+	plat->phy_interface = PHY_INTERFACE_MODE_GMII;
+	plat->mdio_bus_data->phy_mask = ~(u32)BIT(2);
+	plat->fix_mac_speed = loongson_gnet_fix_speed;
+
+	return 0;
+}
+
+static struct stmmac_pci_info loongson_gnet_pci_info = {
+	.setup = loongson_gnet_data,
+};
+
+static void loongson_dwmac_dma_init_channel(struct stmmac_priv *priv,
+					    void __iomem *ioaddr,
+					    struct stmmac_dma_cfg *dma_cfg,
+					    u32 chan)
+{
+	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
+	u32 value;
+
+	value = readl(ioaddr + DMA_CHAN_BUS_MODE(chan));
+
+	if (dma_cfg->pblx8)
+		value |= DMA_BUS_MODE_MAXPBL;
+
+	value |= DMA_BUS_MODE_USP;
+	value &= ~(DMA_BUS_MODE_PBL_MASK | DMA_BUS_MODE_RPBL_MASK);
+	value |= (txpbl << DMA_BUS_MODE_PBL_SHIFT);
+	value |= (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
+
+	/* Set the Fixed burst mode */
+	if (dma_cfg->fixed_burst)
+		value |= DMA_BUS_MODE_FB;
+
+	/* Mixed Burst has no effect when fb is set */
+	if (dma_cfg->mixed_burst)
+		value |= DMA_BUS_MODE_MB;
+
+	if (dma_cfg->atds)
+		value |= DMA_BUS_MODE_ATDS;
+
+	if (dma_cfg->aal)
+		value |= DMA_BUS_MODE_AAL;
+
+	writel(value, ioaddr + DMA_CHAN_BUS_MODE(chan));
+
+	/* Mask interrupts by writing to CSR7 */
+	writel(DMA_INTR_DEFAULT_MASK_LOONGSON, ioaddr +
+	       DMA_CHAN_INTR_ENA(chan));
+}
+
+static int loongson_dwmac_dma_interrupt(struct stmmac_priv *priv,
+					void __iomem *ioaddr,
+					struct stmmac_extra_stats *x,
+					u32 chan, u32 dir)
+{
+	struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
+	u32 abnor_intr_status;
+	u32 nor_intr_status;
+	u32 fb_intr_status;
+	u32 intr_status;
+	int ret = 0;
+
+	/* read the status register (CSR5) */
+	intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
+
+	if (dir == DMA_DIR_RX)
+		intr_status &= DMA_STATUS_MSK_RX_LOONGSON;
+	else if (dir == DMA_DIR_TX)
+		intr_status &= DMA_STATUS_MSK_TX_LOONGSON;
+
+	nor_intr_status = intr_status & (DMA_STATUS_NIS_TX_LOONGSON |
+		DMA_STATUS_NIS_RX_LOONGSON);
+	abnor_intr_status = intr_status & (DMA_STATUS_AIS_TX_LOONGSON |
+		DMA_STATUS_AIS_RX_LOONGSON);
+	fb_intr_status = intr_status & (DMA_STATUS_FBI_TX_LOONGSON |
+		DMA_STATUS_FBI_RX_LOONGSON);
+
+	/* ABNORMAL interrupts */
+	if (unlikely(abnor_intr_status)) {
+		if (unlikely(intr_status & DMA_STATUS_UNF)) {
+			ret = tx_hard_error_bump_tc;
+			x->tx_undeflow_irq++;
+		}
+		if (unlikely(intr_status & DMA_STATUS_TJT))
+			x->tx_jabber_irq++;
+		if (unlikely(intr_status & DMA_STATUS_OVF))
+			x->rx_overflow_irq++;
+		if (unlikely(intr_status & DMA_STATUS_RU))
+			x->rx_buf_unav_irq++;
+		if (unlikely(intr_status & DMA_STATUS_RPS))
+			x->rx_process_stopped_irq++;
+		if (unlikely(intr_status & DMA_STATUS_RWT))
+			x->rx_watchdog_irq++;
+		if (unlikely(intr_status & DMA_STATUS_ETI))
+			x->tx_early_irq++;
+		if (unlikely(intr_status & DMA_STATUS_TPS)) {
+			x->tx_process_stopped_irq++;
+			ret = tx_hard_error;
+		}
+		if (unlikely(fb_intr_status)) {
+			x->fatal_bus_error_irq++;
+			ret = tx_hard_error;
+		}
+	}
+	/* TX/RX NORMAL interrupts */
+	if (likely(nor_intr_status)) {
+		if (likely(intr_status & DMA_STATUS_RI)) {
+			u32 value = readl(ioaddr + DMA_INTR_ENA);
+			/* to schedule NAPI on real RIE event. */
+			if (likely(value & DMA_INTR_ENA_RIE)) {
+				u64_stats_update_begin(&stats->syncp);
+				u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+				u64_stats_update_end(&stats->syncp);
+				ret |= handle_rx;
+			}
+		}
+		if (likely(intr_status & DMA_STATUS_TI)) {
+			u64_stats_update_begin(&stats->syncp);
+			u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+			u64_stats_update_end(&stats->syncp);
+			ret |= handle_tx;
+		}
+		if (unlikely(intr_status & DMA_STATUS_ERI))
+			x->rx_early_irq++;
+	}
+	/* Optional hardware blocks, interrupts should be disabled */
+	if (unlikely(intr_status &
+		     (DMA_STATUS_GPI | DMA_STATUS_GMI | DMA_STATUS_GLI)))
+		pr_warn("%s: unexpected status %08x\n", __func__, intr_status);
+
+	/* Clear the interrupt by writing a logic 1 to the CSR5[19-0] */
+	writel((intr_status & 0x7ffff), ioaddr + DMA_CHAN_STATUS(chan));
+
+	return ret;
+}
+
+static struct mac_device_info *loongson_dwmac_setup(void *apriv)
+{
+	struct stmmac_priv *priv = apriv;
+	struct mac_device_info *mac;
+	struct stmmac_dma_ops *dma;
+	struct loongson_data *ld;
+	struct pci_dev *pdev;
+
+	ld = priv->plat->bsp_priv;
+	pdev = to_pci_dev(priv->device);
+
+	mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
+	if (!mac)
+		return NULL;
+
+	dma = devm_kzalloc(priv->device, sizeof(*dma), GFP_KERNEL);
+	if (!dma)
+		return NULL;
+
+	/* The Loongson GMAC and GNET devices are based on the DW GMAC
+	 * v3.50a and v3.73a IP-cores. But the HW designers have changed the
+	 * GMAC_VERSION.SNPSVER field to the custom 0x10 value on the
+	 * network controllers with the multi-channels feature
+	 * available to emphasize the differences: multiple DMA-channels,
+	 * AV feature and GMAC_INT_STATUS CSR flags layout. Get back the
+	 * original value so the correct HW-interface would be selected.
+	 */
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) {
+		priv->synopsys_id = DWMAC_CORE_3_70;
+		*dma = dwmac1000_dma_ops;
+		dma->init_chan = loongson_dwmac_dma_init_channel;
+		dma->dma_interrupt = loongson_dwmac_dma_interrupt;
+		mac->dma = dma;
+	}
+
+	priv->dev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* Pre-initialize the respective "mac" fields as it's done in
+	 * dwmac1000_setup()
+	 */
+	mac->pcsr = priv->ioaddr;
+	mac->multicast_filter_bins = priv->plat->multicast_filter_bins;
+	mac->unicast_filter_entries = priv->plat->unicast_filter_entries;
+	mac->mcast_bits_log2 = 0;
+
+	if (mac->multicast_filter_bins)
+		mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+
+	/* Loongson GMAC doesn't support the flow control. LS2K2000
+	 * GNET doesn't support the half-duplex link mode.
+	 */
+	if (pdev->device == PCI_DEVICE_ID_LOONGSON_GMAC) {
+		mac->link.caps = MAC_10 | MAC_100 | MAC_1000;
+	} else {
+		if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN)
+			mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+					 MAC_10 | MAC_100 | MAC_1000;
+		else
+			mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+					 MAC_10FD | MAC_100FD | MAC_1000FD;
+	}
+
+	mac->link.duplex = GMAC_CONTROL_DM;
+	mac->link.speed10 = GMAC_CONTROL_PS;
+	mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
+	mac->link.speed1000 = 0;
+	mac->link.speed_mask = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
+	mac->mii.addr = GMAC_MII_ADDR;
+	mac->mii.data = GMAC_MII_DATA;
+	mac->mii.addr_shift = 11;
+	mac->mii.addr_mask = 0x0000F800;
+	mac->mii.reg_shift = 6;
+	mac->mii.reg_mask = 0x000007C0;
+	mac->mii.clk_csr_shift = 2;
+	mac->mii.clk_csr_mask = GENMASK(5, 2);
+
+	return mac;
+}
+
+static int loongson_dwmac_msi_config(struct pci_dev *pdev,
+				     struct plat_stmmacenet_data *plat,
+				     struct stmmac_resources *res)
+{
+	int i, ret, vecs;
+
+	vecs = roundup_pow_of_two(CHANNEL_NUM * 2 + 1);
+	ret = pci_alloc_irq_vectors(pdev, vecs, vecs, PCI_IRQ_MSI);
+	if (ret < 0) {
+		dev_warn(&pdev->dev, "Failed to allocate MSI IRQs\n");
+		return ret;
+	}
+
+	res->irq = pci_irq_vector(pdev, 0);
+
+	for (i = 0; i < plat->rx_queues_to_use; i++) {
+		res->rx_irq[CHANNEL_NUM - 1 - i] =
+			pci_irq_vector(pdev, 1 + i * 2);
+	}
+
+	for (i = 0; i < plat->tx_queues_to_use; i++) {
+		res->tx_irq[CHANNEL_NUM - 1 - i] =
+			pci_irq_vector(pdev, 2 + i * 2);
+	}
+
+	plat->flags |= STMMAC_FLAG_MULTI_MSI_EN;
+
+	return 0;
+}
+
+static void loongson_dwmac_msi_clear(struct pci_dev *pdev)
+{
+	pci_free_irq_vectors(pdev);
+}
+
+static int loongson_dwmac_dt_config(struct pci_dev *pdev,
+				    struct plat_stmmacenet_data *plat,
+				    struct stmmac_resources *res)
+{
+	struct device_node *np = dev_of_node(&pdev->dev);
+	int ret;
+
+	plat->mdio_node = of_get_child_by_name(np, "mdio");
+	if (plat->mdio_node) {
+		dev_info(&pdev->dev, "Found MDIO subnode\n");
+		plat->mdio_bus_data->needs_reset = true;
+	}
+
+	ret = of_alias_get_id(np, "ethernet");
+	if (ret >= 0)
+		plat->bus_id = ret;
+
+	res->irq = of_irq_get_byname(np, "macirq");
+	if (res->irq < 0) {
+		dev_err(&pdev->dev, "IRQ macirq not found\n");
+		ret = -ENODEV;
+		goto err_put_node;
+	}
+
+	res->wol_irq = of_irq_get_byname(np, "eth_wake_irq");
+	if (res->wol_irq < 0) {
+		dev_info(&pdev->dev,
+			 "IRQ eth_wake_irq not found, using macirq\n");
+		res->wol_irq = res->irq;
+	}
+
+	res->lpi_irq = of_irq_get_byname(np, "eth_lpi");
+	if (res->lpi_irq < 0) {
+		dev_err(&pdev->dev, "IRQ eth_lpi not found\n");
+		ret = -ENODEV;
+		goto err_put_node;
+	}
+
+	ret = device_get_phy_mode(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "phy_mode not found\n");
+		ret = -ENODEV;
+		goto err_put_node;
+	}
+
+	plat->phy_interface = ret;
+
+	return 0;
+
+err_put_node:
+	of_node_put(plat->mdio_node);
+
+	return ret;
+}
+
+static void loongson_dwmac_dt_clear(struct pci_dev *pdev,
+				    struct plat_stmmacenet_data *plat)
+{
+	of_node_put(plat->mdio_node);
+}
+
+static int loongson_dwmac_acpi_config(struct pci_dev *pdev,
+				      struct plat_stmmacenet_data *plat,
+				      struct stmmac_resources *res)
+{
+	if (!pdev->irq)
+		return -EINVAL;
+
+	res->irq = pdev->irq;
+
+	return 0;
+}
+
+static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct plat_stmmacenet_data *plat;
+	struct stmmac_pci_info *info;
+	struct stmmac_resources res;
+	struct loongson_data *ld;
+	int ret, i;
+
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
 		return -ENOMEM;
@@ -69,25 +534,23 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 	if (!plat->mdio_bus_data)
 		return -ENOMEM;
 
-	plat->mdio_node = of_get_child_by_name(np, "mdio");
-	if (plat->mdio_node) {
-		dev_info(&pdev->dev, "Found MDIO subnode\n");
-		plat->mdio_bus_data->needs_reset = true;
-	}
-
 	plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
-	if (!plat->dma_cfg) {
-		ret = -ENOMEM;
-		goto err_put_node;
-	}
+	if (!plat->dma_cfg)
+		return -ENOMEM;
+
+	ld = devm_kzalloc(&pdev->dev, sizeof(*ld), GFP_KERNEL);
+	if (!ld)
+		return -ENOMEM;
 
 	/* Enable pci device */
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__);
-		goto err_put_node;
+		return ret;
 	}
 
+	pci_set_master(pdev);
+
 	/* Get the base address of device */
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (pci_resource_len(pdev, i) == 0)
@@ -98,59 +561,43 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 		break;
 	}
 
-	plat->bus_id = of_alias_get_id(np, "ethernet");
-	if (plat->bus_id < 0)
-		plat->bus_id = pci_dev_id(pdev);
-
-	phy_mode = device_get_phy_mode(&pdev->dev);
-	if (phy_mode < 0) {
-		dev_err(&pdev->dev, "phy_mode not found\n");
-		ret = phy_mode;
-		goto err_disable_device;
-	}
-
-	plat->phy_interface = phy_mode;
-	plat->mac_interface = PHY_INTERFACE_MODE_GMII;
-
-	pci_set_master(pdev);
-
-	loongson_default_data(plat);
-	pci_enable_msi(pdev);
 	memset(&res, 0, sizeof(res));
 	res.addr = pcim_iomap_table(pdev)[0];
 
-	res.irq = of_irq_get_byname(np, "macirq");
-	if (res.irq < 0) {
-		dev_err(&pdev->dev, "IRQ macirq not found\n");
-		ret = -ENODEV;
-		goto err_disable_msi;
-	}
+	plat->bsp_priv = ld;
+	plat->setup = loongson_dwmac_setup;
+	ld->dev = &pdev->dev;
+	ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff;
 
-	res.wol_irq = of_irq_get_byname(np, "eth_wake_irq");
-	if (res.wol_irq < 0) {
-		dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n");
-		res.wol_irq = res.irq;
-	}
+	info = (struct stmmac_pci_info *)id->driver_data;
+	ret = info->setup(pdev, plat);
+	if (ret)
+		goto err_disable_device;
 
-	res.lpi_irq = of_irq_get_byname(np, "eth_lpi");
-	if (res.lpi_irq < 0) {
-		dev_err(&pdev->dev, "IRQ eth_lpi not found\n");
-		ret = -ENODEV;
-		goto err_disable_msi;
-	}
+	if (dev_of_node(&pdev->dev))
+		ret = loongson_dwmac_dt_config(pdev, plat, &res);
+	else
+		ret = loongson_dwmac_acpi_config(pdev, plat, &res);
+	if (ret)
+		goto err_disable_device;
+
+	/* Use the common MAC IRQ if per-channel MSIs allocation failed */
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN)
+		loongson_dwmac_msi_config(pdev, plat, &res);
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
 	if (ret)
-		goto err_disable_msi;
+		goto err_plat_clear;
 
-	return ret;
+	return 0;
 
-err_disable_msi:
-	pci_disable_msi(pdev);
+err_plat_clear:
+	if (dev_of_node(&pdev->dev))
+		loongson_dwmac_dt_clear(pdev, plat);
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN)
+		loongson_dwmac_msi_clear(pdev);
 err_disable_device:
 	pci_disable_device(pdev);
-err_put_node:
-	of_node_put(plat->mdio_node);
 	return ret;
 }
 
@@ -158,11 +605,18 @@ static void loongson_dwmac_remove(struct pci_dev *pdev)
 {
 	struct net_device *ndev = dev_get_drvdata(&pdev->dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct loongson_data *ld;
 	int i;
 
-	of_node_put(priv->plat->mdio_node);
+	ld = priv->plat->bsp_priv;
 	stmmac_dvr_remove(&pdev->dev);
 
+	if (dev_of_node(&pdev->dev))
+		loongson_dwmac_dt_clear(pdev, priv->plat);
+
+	if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN)
+		loongson_dwmac_msi_clear(pdev);
+
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (pci_resource_len(pdev, i) == 0)
 			continue;
@@ -170,7 +624,6 @@ static void loongson_dwmac_remove(struct pci_dev *pdev)
 		break;
 	}
 
-	pci_disable_msi(pdev);
 	pci_disable_device(pdev);
 }
 
@@ -213,7 +666,8 @@ static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend,
 			 loongson_dwmac_resume);
 
 static const struct pci_device_id loongson_dwmac_id_table[] = {
-	{ PCI_VDEVICE(LOONGSON, 0x7a03) },
+	{ PCI_DEVICE_DATA(LOONGSON, GMAC, &loongson_gmac_pci_info) },
+	{ PCI_DEVICE_DATA(LOONGSON, GNET, &loongson_gnet_pci_info) },
 	{}
 };
 MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table);
@@ -232,4 +686,5 @@ module_pci_driver(loongson_dwmac_driver);
 
 MODULE_DESCRIPTION("Loongson DWMAC PCI driver");
 MODULE_AUTHOR("Qing Zhang <zhangqing@loongson.cn>");
+MODULE_AUTHOR("Yanteng Si <siyanteng@loongson.cn>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 7ae04d8d291c..50073bdade46 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1116,6 +1116,161 @@ static const struct rk_gmac_ops rk3568_ops = {
 	},
 };
 
+/* VCCIO0_1_3_IOC */
+#define RK3576_VCCIO0_1_3_IOC_CON2		0X6408
+#define RK3576_VCCIO0_1_3_IOC_CON3		0X640c
+#define RK3576_VCCIO0_1_3_IOC_CON4		0X6410
+#define RK3576_VCCIO0_1_3_IOC_CON5		0X6414
+
+#define RK3576_GMAC_RXCLK_DLY_ENABLE		GRF_BIT(15)
+#define RK3576_GMAC_RXCLK_DLY_DISABLE		GRF_CLR_BIT(15)
+#define RK3576_GMAC_TXCLK_DLY_ENABLE		GRF_BIT(7)
+#define RK3576_GMAC_TXCLK_DLY_DISABLE		GRF_CLR_BIT(7)
+
+#define RK3576_GMAC_CLK_RX_DL_CFG(val)		HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3576_GMAC_CLK_TX_DL_CFG(val)		HIWORD_UPDATE(val, 0x7F, 0)
+
+/* SDGMAC_GRF */
+#define RK3576_GRF_GMAC_CON0			0X0020
+#define RK3576_GRF_GMAC_CON1			0X0024
+
+#define RK3576_GMAC_RMII_MODE			GRF_BIT(3)
+#define RK3576_GMAC_RGMII_MODE			GRF_CLR_BIT(3)
+
+#define RK3576_GMAC_CLK_SELECT_IO		GRF_BIT(7)
+#define RK3576_GMAC_CLK_SELECT_CRU		GRF_CLR_BIT(7)
+
+#define RK3576_GMAC_CLK_RMII_DIV2		GRF_BIT(5)
+#define RK3576_GMAC_CLK_RMII_DIV20		GRF_CLR_BIT(5)
+
+#define RK3576_GMAC_CLK_RGMII_DIV1		\
+			(GRF_CLR_BIT(6) | GRF_CLR_BIT(5))
+#define RK3576_GMAC_CLK_RGMII_DIV5		\
+			(GRF_BIT(6) | GRF_BIT(5))
+#define RK3576_GMAC_CLK_RGMII_DIV50		\
+			(GRF_BIT(6) | GRF_CLR_BIT(5))
+
+#define RK3576_GMAC_CLK_RMII_GATE		GRF_BIT(4)
+#define RK3576_GMAC_CLK_RMII_NOGATE		GRF_CLR_BIT(4)
+
+static void rk3576_set_to_rgmii(struct rk_priv_data *bsp_priv,
+				int tx_delay, int rx_delay)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned int offset_con;
+
+	if (IS_ERR(bsp_priv->grf) || IS_ERR(bsp_priv->php_grf)) {
+		dev_err(dev, "Missing rockchip,grf or rockchip,php-grf property\n");
+		return;
+	}
+
+	offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 :
+					 RK3576_GRF_GMAC_CON0;
+
+	regmap_write(bsp_priv->grf, offset_con, RK3576_GMAC_RGMII_MODE);
+
+	offset_con = bsp_priv->id == 1 ? RK3576_VCCIO0_1_3_IOC_CON4 :
+					 RK3576_VCCIO0_1_3_IOC_CON2;
+
+	/* m0 && m1 delay enabled */
+	regmap_write(bsp_priv->php_grf, offset_con,
+		     DELAY_ENABLE(RK3576, tx_delay, rx_delay));
+	regmap_write(bsp_priv->php_grf, offset_con + 0x4,
+		     DELAY_ENABLE(RK3576, tx_delay, rx_delay));
+
+	/* m0 && m1 delay value */
+	regmap_write(bsp_priv->php_grf, offset_con,
+		     RK3576_GMAC_CLK_TX_DL_CFG(tx_delay) |
+		     RK3576_GMAC_CLK_RX_DL_CFG(rx_delay));
+	regmap_write(bsp_priv->php_grf, offset_con + 0x4,
+		     RK3576_GMAC_CLK_TX_DL_CFG(tx_delay) |
+		     RK3576_GMAC_CLK_RX_DL_CFG(rx_delay));
+}
+
+static void rk3576_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned int offset_con;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 :
+					 RK3576_GRF_GMAC_CON0;
+
+	regmap_write(bsp_priv->grf, offset_con, RK3576_GMAC_RMII_MODE);
+}
+
+static void rk3576_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned int val = 0, offset_con;
+
+	switch (speed) {
+	case 10:
+		if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
+			val = RK3576_GMAC_CLK_RMII_DIV20;
+		else
+			val = RK3576_GMAC_CLK_RGMII_DIV50;
+		break;
+	case 100:
+		if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
+			val = RK3576_GMAC_CLK_RMII_DIV2;
+		else
+			val = RK3576_GMAC_CLK_RGMII_DIV5;
+		break;
+	case 1000:
+		if (bsp_priv->phy_iface != PHY_INTERFACE_MODE_RMII)
+			val = RK3576_GMAC_CLK_RGMII_DIV1;
+		else
+			goto err;
+		break;
+	default:
+		goto err;
+	}
+
+	offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 :
+					 RK3576_GRF_GMAC_CON0;
+
+	regmap_write(bsp_priv->grf, offset_con, val);
+
+	return;
+err:
+	dev_err(dev, "unknown speed value for GMAC speed=%d", speed);
+}
+
+static void rk3576_set_clock_selection(struct rk_priv_data *bsp_priv, bool input,
+				       bool enable)
+{
+	unsigned int val = input ? RK3576_GMAC_CLK_SELECT_IO :
+				   RK3576_GMAC_CLK_SELECT_CRU;
+	unsigned int offset_con;
+
+	val |= enable ? RK3576_GMAC_CLK_RMII_NOGATE :
+			RK3576_GMAC_CLK_RMII_GATE;
+
+	offset_con = bsp_priv->id == 1 ? RK3576_GRF_GMAC_CON1 :
+					 RK3576_GRF_GMAC_CON0;
+
+	regmap_write(bsp_priv->grf, offset_con, val);
+}
+
+static const struct rk_gmac_ops rk3576_ops = {
+	.set_to_rgmii = rk3576_set_to_rgmii,
+	.set_to_rmii = rk3576_set_to_rmii,
+	.set_rgmii_speed = rk3576_set_gmac_speed,
+	.set_rmii_speed = rk3576_set_gmac_speed,
+	.set_clock_selection = rk3576_set_clock_selection,
+	.regs_valid = true,
+	.regs = {
+		0x2a220000, /* gmac0 */
+		0x2a230000, /* gmac1 */
+		0x0, /* sentinel */
+	},
+};
+
 /* sys_grf */
 #define RK3588_GRF_GMAC_CON7			0X031c
 #define RK3588_GRF_GMAC_CON8			0X0320
@@ -1141,8 +1296,8 @@ static const struct rk_gmac_ops rk3568_ops = {
 #define RK3588_GMAC_CLK_RMII_MODE(id)		GRF_BIT(5 * (id))
 #define RK3588_GMAC_CLK_RGMII_MODE(id)		GRF_CLR_BIT(5 * (id))
 
-#define RK3588_GMAC_CLK_SELET_CRU(id)		GRF_BIT(5 * (id) + 4)
-#define RK3588_GMAC_CLK_SELET_IO(id)		GRF_CLR_BIT(5 * (id) + 4)
+#define RK3588_GMAC_CLK_SELECT_CRU(id)		GRF_BIT(5 * (id) + 4)
+#define RK3588_GMAC_CLK_SELECT_IO(id)		GRF_CLR_BIT(5 * (id) + 4)
 
 #define RK3588_GMA_CLK_RMII_DIV2(id)		GRF_BIT(5 * (id) + 2)
 #define RK3588_GMA_CLK_RMII_DIV20(id)		GRF_CLR_BIT(5 * (id) + 2)
@@ -1240,8 +1395,8 @@ err:
 static void rk3588_set_clock_selection(struct rk_priv_data *bsp_priv, bool input,
 				       bool enable)
 {
-	unsigned int val = input ? RK3588_GMAC_CLK_SELET_IO(bsp_priv->id) :
-				   RK3588_GMAC_CLK_SELET_CRU(bsp_priv->id);
+	unsigned int val = input ? RK3588_GMAC_CLK_SELECT_IO(bsp_priv->id) :
+				   RK3588_GMAC_CLK_SELECT_CRU(bsp_priv->id);
 
 	val |= enable ? RK3588_GMAC_CLK_RMII_NOGATE(bsp_priv->id) :
 			RK3588_GMAC_CLK_RMII_GATE(bsp_priv->id);
@@ -1908,6 +2063,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
 	{ .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
 	{ .compatible = "rockchip,rk3568-gmac", .data = &rk3568_ops },
+	{ .compatible = "rockchip,rk3576-gmac", .data = &rk3576_ops },
 	{ .compatible = "rockchip,rk3588-gmac", .data = &rk3588_ops },
 	{ .compatible = "rockchip,rv1108-gmac", .data = &rv1108_ops },
 	{ .compatible = "rockchip,rv1126-gmac", .data = &rv1126_ops },
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index e1b761dcfa1d..4a0ae92b3055 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -299,7 +299,7 @@ static int sun8i_dwmac_dma_reset(void __iomem *ioaddr)
  * Called from stmmac via stmmac_dma_ops->init
  */
 static void sun8i_dwmac_dma_init(void __iomem *ioaddr,
-				 struct stmmac_dma_cfg *dma_cfg, int atds)
+				 struct stmmac_dma_cfg *dma_cfg)
 {
 	writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
 	writel(0x1FFFFFF, ioaddr + EMAC_INT_STA);
@@ -395,7 +395,7 @@ static void sun8i_dwmac_dma_start_tx(struct stmmac_priv *priv,
 	writel(v, ioaddr + EMAC_TX_CTL1);
 }
 
-static void sun8i_dwmac_enable_dma_transmission(void __iomem *ioaddr)
+static void sun8i_dwmac_enable_dma_transmission(void __iomem *ioaddr, u32 chan)
 {
 	u32 v;
 
@@ -774,8 +774,8 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv)
 static int get_ephy_nodes(struct stmmac_priv *priv)
 {
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
-	struct device_node *mdio_mux, *iphynode;
 	struct device_node *mdio_internal;
+	struct device_node *mdio_mux;
 	int ret;
 
 	mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux");
@@ -793,7 +793,7 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
 	}
 
 	/* Seek for internal PHY */
-	for_each_child_of_node(mdio_internal, iphynode) {
+	for_each_child_of_node_scoped(mdio_internal, iphynode) {
 		gmac->ephy_clk = of_clk_get(iphynode, 0);
 		if (IS_ERR(gmac->ephy_clk))
 			continue;
@@ -801,14 +801,12 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
 		if (IS_ERR(gmac->rst_ephy)) {
 			ret = PTR_ERR(gmac->rst_ephy);
 			if (ret == -EPROBE_DEFER) {
-				of_node_put(iphynode);
 				of_node_put(mdio_internal);
 				return ret;
 			}
 			continue;
 		}
 		dev_info(priv->device, "Found internal PHY node\n");
-		of_node_put(iphynode);
 		of_node_put(mdio_internal);
 		return 0;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index adccdd816ea9..118a22406a2e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -70,15 +70,17 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	writel(value, ioaddr + DMA_AXI_BUS_MODE);
 }
 
-static void dwmac1000_dma_init(void __iomem *ioaddr,
-			       struct stmmac_dma_cfg *dma_cfg, int atds)
+static void dwmac1000_dma_init_channel(struct stmmac_priv *priv,
+				       void __iomem *ioaddr,
+				       struct stmmac_dma_cfg *dma_cfg, u32 chan)
 {
-	u32 value = readl(ioaddr + DMA_BUS_MODE);
 	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
 	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
+	u32 value;
 
-	/*
-	 * Set the DMA PBL (Programmable Burst Length) mode.
+	value = readl(ioaddr + DMA_CHAN_BUS_MODE(chan));
+
+	/* Set the DMA PBL (Programmable Burst Length) mode.
 	 *
 	 * Note: before stmmac core 3.50 this mode bit was 4xPBL, and
 	 * post 3.5 mode bit acts as 8*PBL.
@@ -98,16 +100,16 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 	if (dma_cfg->mixed_burst)
 		value |= DMA_BUS_MODE_MB;
 
-	if (atds)
+	if (dma_cfg->atds)
 		value |= DMA_BUS_MODE_ATDS;
 
 	if (dma_cfg->aal)
 		value |= DMA_BUS_MODE_AAL;
 
-	writel(value, ioaddr + DMA_BUS_MODE);
+	writel(value, ioaddr + DMA_CHAN_BUS_MODE(chan));
 
 	/* Mask interrupts by writing to CSR7 */
-	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
+	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 static void dwmac1000_dma_init_rx(struct stmmac_priv *priv,
@@ -116,7 +118,7 @@ static void dwmac1000_dma_init_rx(struct stmmac_priv *priv,
 				  dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base address list must be written into DMA CSR3 */
-	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RCV_BASE_ADDR(chan));
 }
 
 static void dwmac1000_dma_init_tx(struct stmmac_priv *priv,
@@ -125,7 +127,7 @@ static void dwmac1000_dma_init_tx(struct stmmac_priv *priv,
 				  dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base address list must be written into DMA CSR4 */
-	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
 }
 
 static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
@@ -153,7 +155,7 @@ static void dwmac1000_dma_operation_mode_rx(struct stmmac_priv *priv,
 					    void __iomem *ioaddr, int mode,
 					    u32 channel, int fifosz, u8 qmode)
 {
-	u32 csr6 = readl(ioaddr + DMA_CONTROL);
+	u32 csr6 = readl(ioaddr + DMA_CHAN_CONTROL(channel));
 
 	if (mode == SF_DMA_MODE) {
 		pr_debug("GMAC: enable RX store and forward mode\n");
@@ -175,14 +177,14 @@ static void dwmac1000_dma_operation_mode_rx(struct stmmac_priv *priv,
 	/* Configure flow control based on rx fifo size */
 	csr6 = dwmac1000_configure_fc(csr6, fifosz);
 
-	writel(csr6, ioaddr + DMA_CONTROL);
+	writel(csr6, ioaddr + DMA_CHAN_CONTROL(channel));
 }
 
 static void dwmac1000_dma_operation_mode_tx(struct stmmac_priv *priv,
 					    void __iomem *ioaddr, int mode,
 					    u32 channel, int fifosz, u8 qmode)
 {
-	u32 csr6 = readl(ioaddr + DMA_CONTROL);
+	u32 csr6 = readl(ioaddr + DMA_CHAN_CONTROL(channel));
 
 	if (mode == SF_DMA_MODE) {
 		pr_debug("GMAC: enable TX store and forward mode\n");
@@ -209,7 +211,7 @@ static void dwmac1000_dma_operation_mode_tx(struct stmmac_priv *priv,
 			csr6 |= DMA_CONTROL_TTC_256;
 	}
 
-	writel(csr6, ioaddr + DMA_CONTROL);
+	writel(csr6, ioaddr + DMA_CHAN_CONTROL(channel));
 }
 
 static void dwmac1000_dump_dma_regs(struct stmmac_priv *priv,
@@ -271,12 +273,12 @@ static int dwmac1000_get_hw_feature(void __iomem *ioaddr,
 static void dwmac1000_rx_watchdog(struct stmmac_priv *priv,
 				  void __iomem *ioaddr, u32 riwt, u32 queue)
 {
-	writel(riwt, ioaddr + DMA_RX_WATCHDOG);
+	writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(queue));
 }
 
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.reset = dwmac_dma_reset,
-	.init = dwmac1000_dma_init,
+	.init_chan = dwmac1000_dma_init_channel,
 	.init_rx_chan = dwmac1000_dma_init_rx,
 	.init_tx_chan = dwmac1000_dma_init_tx,
 	.axi = dwmac1000_dma_axi,
@@ -294,3 +296,4 @@ const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.get_hw_feature = dwmac1000_get_hw_feature,
 	.rx_watchdog = dwmac1000_rx_watchdog,
 };
+EXPORT_SYMBOL_GPL(dwmac1000_dma_ops);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index b402fb54f613..82957db47c99 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -19,7 +19,7 @@
 #include "dwmac_dma.h"
 
 static void dwmac100_dma_init(void __iomem *ioaddr,
-			      struct stmmac_dma_cfg *dma_cfg, int atds)
+			      struct stmmac_dma_cfg *dma_cfg)
 {
 	/* Enable Application Access by writing to DMA CSR0 */
 	writel(DMA_BUS_MODE_DEFAULT | (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT),
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index d3c5306f1c41..93a78fd0737b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs,
 #define GMAC_PHYIF_CTRLSTATUS_LNKSTS		BIT(19)
 #define GMAC_PHYIF_CTRLSTATUS_JABTO		BIT(20)
 #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET	BIT(21)
-/* LNKMOD */
-#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK	0x1
 /* LNKSPEED */
 #define GMAC_PHYIF_CTRLSTATUS_SPEED_125		0x2
 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25		0x1
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index f98741d2607e..a1858f083eef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -58,10 +58,6 @@ static void dwmac4_core_init(struct mac_device_info *hw,
 	if (hw->pcs)
 		value |= GMAC_PCS_IRQ_DEFAULT;
 
-	/* Enable FPE interrupt */
-	if ((GMAC_HW_FEAT_FPESEL & readl(ioaddr + GMAC_HW_FEATURE3)) >> 26)
-		value |= GMAC_INT_FPE_EN;
-
 	writel(value, ioaddr + GMAC_INT_EN);
 
 	if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
@@ -786,7 +782,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x)
 		else
 			x->pcs_speed = SPEED_10;
 
-		x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK);
+		x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD);
 
 		pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed,
 			x->pcs_duplex ? "Full" : "Half");
@@ -1268,6 +1264,9 @@ const struct stmmac_ops dwmac410_ops = {
 	.fpe_configure = dwmac5_fpe_configure,
 	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
 	.fpe_irq_status = dwmac5_fpe_irq_status,
+	.fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size,
+	.fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size,
+	.fpe_map_preemption_class = dwmac5_fpe_map_preemption_class,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
@@ -1320,6 +1319,9 @@ const struct stmmac_ops dwmac510_ops = {
 	.fpe_configure = dwmac5_fpe_configure,
 	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
 	.fpe_irq_status = dwmac5_fpe_irq_status,
+	.fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size,
+	.fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size,
+	.fpe_map_preemption_class = dwmac5_fpe_map_preemption_class,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 1c5802e0d7f4..e99401bcc1f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -186,10 +186,12 @@ static void dwmac4_set_tx_owner(struct dma_desc *p)
 
 static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
-	p->des3 |= cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
+	u32 flags = (RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
 
 	if (!disable_rx_ic)
-		p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
+		flags |= RDES3_INT_ON_COMPLETION_EN;
+
+	p->des3 |= cpu_to_le32(flags);
 }
 
 static int dwmac4_get_tx_ls(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 84d3a8551b03..e0165358c4ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -153,7 +153,7 @@ static void dwmac410_dma_init_channel(struct stmmac_priv *priv,
 }
 
 static void dwmac4_dma_init(void __iomem *ioaddr,
-			    struct stmmac_dma_cfg *dma_cfg, int atds)
+			    struct stmmac_dma_cfg *dma_cfg)
 {
 	u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index e02cebc3f1b7..08add508db84 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -575,11 +575,11 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 
 void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
 			  u32 num_txq, u32 num_rxq,
-			  bool enable)
+			  bool tx_enable, bool pmac_enable)
 {
 	u32 value;
 
-	if (enable) {
+	if (tx_enable) {
 		cfg->fpe_csr = EFPE;
 		value = readl(ioaddr + GMAC_RXQ_CTRL1);
 		value &= ~GMAC_RXQCTRL_FPRQ;
@@ -589,6 +589,21 @@ void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
 		cfg->fpe_csr = 0;
 	}
 	writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS);
+
+	value = readl(ioaddr + GMAC_INT_EN);
+
+	if (pmac_enable) {
+		if (!(value & GMAC_INT_FPE_EN)) {
+			/* Dummy read to clear any pending masked interrupts */
+			readl(ioaddr + MAC_FPE_CTRL_STS);
+
+			value |= GMAC_INT_FPE_EN;
+		}
+	} else {
+		value &= ~GMAC_INT_FPE_EN;
+	}
+
+	writel(value, ioaddr + GMAC_INT_EN);
 }
 
 int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
@@ -605,22 +620,22 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
 
 	if (value & TRSP) {
 		status |= FPE_EVENT_TRSP;
-		netdev_info(dev, "FPE: Respond mPacket is transmitted\n");
+		netdev_dbg(dev, "FPE: Respond mPacket is transmitted\n");
 	}
 
 	if (value & TVER) {
 		status |= FPE_EVENT_TVER;
-		netdev_info(dev, "FPE: Verify mPacket is transmitted\n");
+		netdev_dbg(dev, "FPE: Verify mPacket is transmitted\n");
 	}
 
 	if (value & RRSP) {
 		status |= FPE_EVENT_RRSP;
-		netdev_info(dev, "FPE: Respond mPacket is received\n");
+		netdev_dbg(dev, "FPE: Respond mPacket is received\n");
 	}
 
 	if (value & RVER) {
 		status |= FPE_EVENT_RVER;
-		netdev_info(dev, "FPE: Verify mPacket is received\n");
+		netdev_dbg(dev, "FPE: Verify mPacket is received\n");
 	}
 
 	return status;
@@ -638,3 +653,72 @@ void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
 
 	writel(value, ioaddr + MAC_FPE_CTRL_STS);
 }
+
+int dwmac5_fpe_get_add_frag_size(const void __iomem *ioaddr)
+{
+	return FIELD_GET(DWMAC5_ADD_FRAG_SZ, readl(ioaddr + MTL_FPE_CTRL_STS));
+}
+
+void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size)
+{
+	u32 value;
+
+	value = readl(ioaddr + MTL_FPE_CTRL_STS);
+	writel(u32_replace_bits(value, add_frag_size, DWMAC5_ADD_FRAG_SZ),
+	       ioaddr + MTL_FPE_CTRL_STS);
+}
+
+#define ALG_ERR_MSG "TX algorithm SP is not suitable for one-to-many mapping"
+#define WEIGHT_ERR_MSG "TXQ weight %u differs across other TXQs in TC: [%u]"
+
+int dwmac5_fpe_map_preemption_class(struct net_device *ndev,
+				    struct netlink_ext_ack *extack, u32 pclass)
+{
+	u32 val, offset, count, queue_weight, preemptible_txqs = 0;
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	u32 num_tc = ndev->num_tc;
+
+	if (!pclass)
+		goto update_mapping;
+
+	/* DWMAC CORE4+ can not program TC:TXQ mapping to hardware.
+	 *
+	 * Synopsys Databook:
+	 * "The number of Tx DMA channels is equal to the number of Tx queues,
+	 * and is direct one-to-one mapping."
+	 */
+	for (u32 tc = 0; tc < num_tc; tc++) {
+		count = ndev->tc_to_txq[tc].count;
+		offset = ndev->tc_to_txq[tc].offset;
+
+		if (pclass & BIT(tc))
+			preemptible_txqs |= GENMASK(offset + count - 1, offset);
+
+		/* This is 1:1 mapping, go to next TC */
+		if (count == 1)
+			continue;
+
+		if (priv->plat->tx_sched_algorithm == MTL_TX_ALGORITHM_SP) {
+			NL_SET_ERR_MSG_MOD(extack, ALG_ERR_MSG);
+			return -EINVAL;
+		}
+
+		queue_weight = priv->plat->tx_queues_cfg[offset].weight;
+
+		for (u32 i = 1; i < count; i++) {
+			if (priv->plat->tx_queues_cfg[offset + i].weight !=
+			    queue_weight) {
+				NL_SET_ERR_MSG_FMT_MOD(extack, WEIGHT_ERR_MSG,
+						       queue_weight, tc);
+				return -EINVAL;
+			}
+		}
+	}
+
+update_mapping:
+	val = readl(priv->ioaddr + MTL_FPE_CTRL_STS);
+	writel(u32_replace_bits(val, preemptible_txqs, DWMAC5_PREEMPTION_CLASS),
+	       priv->ioaddr + MTL_FPE_CTRL_STS);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index bf33a51d229e..6c6eb6790e83 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -39,6 +39,12 @@
 #define MAC_PPSx_INTERVAL(x)		(0x00000b88 + ((x) * 0x10))
 #define MAC_PPSx_WIDTH(x)		(0x00000b8c + ((x) * 0x10))
 
+#define MTL_FPE_CTRL_STS		0x00000c90
+/* Preemption Classification */
+#define DWMAC5_PREEMPTION_CLASS		GENMASK(15, 8)
+/* Additional Fragment Size of preempted frames */
+#define DWMAC5_ADD_FRAG_SZ		GENMASK(1, 0)
+
 #define MTL_RXP_CONTROL_STATUS		0x00000ca0
 #define RXPI				BIT(31)
 #define NPE				GENMASK(23, 16)
@@ -104,10 +110,14 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 			   u32 sub_second_inc, u32 systime_flags);
 void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
 			  u32 num_txq, u32 num_rxq,
-			  bool enable);
+			  bool tx_enable, bool pmac_enable);
 void dwmac5_fpe_send_mpacket(void __iomem *ioaddr,
 			     struct stmmac_fpe_cfg *cfg,
 			     enum stmmac_mpacket_type type);
 int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev);
+int dwmac5_fpe_get_add_frag_size(const void __iomem *ioaddr);
+void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size);
+int dwmac5_fpe_map_preemption_class(struct net_device *ndev,
+				    struct netlink_ext_ack *extack, u32 pclass);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 72672391675f..5d9c18f5bbf5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -22,6 +22,31 @@
 #define DMA_INTR_ENA		0x0000101c	/* Interrupt Enable */
 #define DMA_MISSED_FRAME_CTR	0x00001020	/* Missed Frame Counter */
 
+/* Following DMA defines are channels oriented */
+#define DMA_CHAN_BASE_OFFSET			0x100
+
+static inline u32 dma_chan_base_addr(u32 base, u32 chan)
+{
+	return base + chan * DMA_CHAN_BASE_OFFSET;
+}
+
+#define DMA_CHAN_BUS_MODE(chan)	dma_chan_base_addr(DMA_BUS_MODE, chan)
+#define DMA_CHAN_XMT_POLL_DEMAND(chan)	\
+				dma_chan_base_addr(DMA_XMT_POLL_DEMAND, chan)
+#define DMA_CHAN_RCV_POLL_DEMAND(chan)	\
+				dma_chan_base_addr(DMA_RCV_POLL_DEMAND, chan)
+#define DMA_CHAN_RCV_BASE_ADDR(chan)	\
+				dma_chan_base_addr(DMA_RCV_BASE_ADDR, chan)
+#define DMA_CHAN_TX_BASE_ADDR(chan)	\
+				dma_chan_base_addr(DMA_TX_BASE_ADDR, chan)
+#define DMA_CHAN_STATUS(chan)	dma_chan_base_addr(DMA_STATUS, chan)
+#define DMA_CHAN_CONTROL(chan)	dma_chan_base_addr(DMA_CONTROL, chan)
+#define DMA_CHAN_INTR_ENA(chan)	dma_chan_base_addr(DMA_INTR_ENA, chan)
+#define DMA_CHAN_MISSED_FRAME_CTR(chan)	\
+				dma_chan_base_addr(DMA_MISSED_FRAME_CTR, chan)
+#define DMA_CHAN_RX_WATCHDOG(chan)	\
+				dma_chan_base_addr(DMA_RX_WATCHDOG, chan)
+
 /* SW Reset */
 #define DMA_BUS_MODE_SFT_RESET	0x00000001	/* Software Reset */
 
@@ -152,7 +177,7 @@
 #define NUM_DWMAC1000_DMA_REGS	23
 #define NUM_DWMAC4_DMA_REGS	27
 
-void dwmac_enable_dma_transmission(void __iomem *ioaddr);
+void dwmac_enable_dma_transmission(void __iomem *ioaddr, u32 chan);
 void dwmac_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
 			  u32 chan, bool rx, bool tx);
 void dwmac_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 85e18f9a22f9..4846bf49c576 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -28,65 +28,65 @@ int dwmac_dma_reset(void __iomem *ioaddr)
 }
 
 /* CSR1 enables the transmit DMA to check for new descriptor */
-void dwmac_enable_dma_transmission(void __iomem *ioaddr)
+void dwmac_enable_dma_transmission(void __iomem *ioaddr, u32 chan)
 {
-	writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
+	writel(1, ioaddr + DMA_CHAN_XMT_POLL_DEMAND(chan));
 }
 
 void dwmac_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
 			  u32 chan, bool rx, bool tx)
 {
-	u32 value = readl(ioaddr + DMA_INTR_ENA);
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
 
 	if (rx)
 		value |= DMA_INTR_DEFAULT_RX;
 	if (tx)
 		value |= DMA_INTR_DEFAULT_TX;
 
-	writel(value, ioaddr + DMA_INTR_ENA);
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 void dwmac_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr,
 			   u32 chan, bool rx, bool tx)
 {
-	u32 value = readl(ioaddr + DMA_INTR_ENA);
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
 
 	if (rx)
 		value &= ~DMA_INTR_DEFAULT_RX;
 	if (tx)
 		value &= ~DMA_INTR_DEFAULT_TX;
 
-	writel(value, ioaddr + DMA_INTR_ENA);
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 void dwmac_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr,
 			u32 chan)
 {
-	u32 value = readl(ioaddr + DMA_CONTROL);
+	u32 value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
 	value |= DMA_CONTROL_ST;
-	writel(value, ioaddr + DMA_CONTROL);
+	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 }
 
 void dwmac_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan)
 {
-	u32 value = readl(ioaddr + DMA_CONTROL);
+	u32 value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
 	value &= ~DMA_CONTROL_ST;
-	writel(value, ioaddr + DMA_CONTROL);
+	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 }
 
 void dwmac_dma_start_rx(struct stmmac_priv *priv, void __iomem *ioaddr,
 			u32 chan)
 {
-	u32 value = readl(ioaddr + DMA_CONTROL);
+	u32 value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
 	value |= DMA_CONTROL_SR;
-	writel(value, ioaddr + DMA_CONTROL);
+	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 }
 
 void dwmac_dma_stop_rx(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan)
 {
-	u32 value = readl(ioaddr + DMA_CONTROL);
+	u32 value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
 	value &= ~DMA_CONTROL_SR;
-	writel(value, ioaddr + DMA_CONTROL);
+	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 }
 
 #ifdef DWMAC_DMA_DEBUG
@@ -165,7 +165,7 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 	struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
 	int ret = 0;
 	/* read the status register (CSR5) */
-	u32 intr_status = readl(ioaddr + DMA_STATUS);
+	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
 
 #ifdef DWMAC_DMA_DEBUG
 	/* Enable it to monitor DMA rx/tx status in case of critical problems */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index f196cd99d510..f519d43738b0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -846,42 +846,41 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
 	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
 };
 
-#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error"
-#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error"
-
+static const char dpp_rx_err[] = "Read Rx Descriptor Parity checker Error";
+static const char dpp_tx_err[] = "Read Tx Descriptor Parity checker Error";
 static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
-	{ true, "TDPES0", DPP_TX_ERR },
-	{ true, "TDPES1", DPP_TX_ERR },
-	{ true, "TDPES2", DPP_TX_ERR },
-	{ true, "TDPES3", DPP_TX_ERR },
-	{ true, "TDPES4", DPP_TX_ERR },
-	{ true, "TDPES5", DPP_TX_ERR },
-	{ true, "TDPES6", DPP_TX_ERR },
-	{ true, "TDPES7", DPP_TX_ERR },
-	{ true, "TDPES8", DPP_TX_ERR },
-	{ true, "TDPES9", DPP_TX_ERR },
-	{ true, "TDPES10", DPP_TX_ERR },
-	{ true, "TDPES11", DPP_TX_ERR },
-	{ true, "TDPES12", DPP_TX_ERR },
-	{ true, "TDPES13", DPP_TX_ERR },
-	{ true, "TDPES14", DPP_TX_ERR },
-	{ true, "TDPES15", DPP_TX_ERR },
-	{ true, "RDPES0", DPP_RX_ERR },
-	{ true, "RDPES1", DPP_RX_ERR },
-	{ true, "RDPES2", DPP_RX_ERR },
-	{ true, "RDPES3", DPP_RX_ERR },
-	{ true, "RDPES4", DPP_RX_ERR },
-	{ true, "RDPES5", DPP_RX_ERR },
-	{ true, "RDPES6", DPP_RX_ERR },
-	{ true, "RDPES7", DPP_RX_ERR },
-	{ true, "RDPES8", DPP_RX_ERR },
-	{ true, "RDPES9", DPP_RX_ERR },
-	{ true, "RDPES10", DPP_RX_ERR },
-	{ true, "RDPES11", DPP_RX_ERR },
-	{ true, "RDPES12", DPP_RX_ERR },
-	{ true, "RDPES13", DPP_RX_ERR },
-	{ true, "RDPES14", DPP_RX_ERR },
-	{ true, "RDPES15", DPP_RX_ERR },
+	{ true, "TDPES0", dpp_tx_err },
+	{ true, "TDPES1", dpp_tx_err },
+	{ true, "TDPES2", dpp_tx_err },
+	{ true, "TDPES3", dpp_tx_err },
+	{ true, "TDPES4", dpp_tx_err },
+	{ true, "TDPES5", dpp_tx_err },
+	{ true, "TDPES6", dpp_tx_err },
+	{ true, "TDPES7", dpp_tx_err },
+	{ true, "TDPES8", dpp_tx_err },
+	{ true, "TDPES9", dpp_tx_err },
+	{ true, "TDPES10", dpp_tx_err },
+	{ true, "TDPES11", dpp_tx_err },
+	{ true, "TDPES12", dpp_tx_err },
+	{ true, "TDPES13", dpp_tx_err },
+	{ true, "TDPES14", dpp_tx_err },
+	{ true, "TDPES15", dpp_tx_err },
+	{ true, "RDPES0", dpp_rx_err },
+	{ true, "RDPES1", dpp_rx_err },
+	{ true, "RDPES2", dpp_rx_err },
+	{ true, "RDPES3", dpp_rx_err },
+	{ true, "RDPES4", dpp_rx_err },
+	{ true, "RDPES5", dpp_rx_err },
+	{ true, "RDPES6", dpp_rx_err },
+	{ true, "RDPES7", dpp_rx_err },
+	{ true, "RDPES8", dpp_rx_err },
+	{ true, "RDPES9", dpp_rx_err },
+	{ true, "RDPES10", dpp_rx_err },
+	{ true, "RDPES11", dpp_rx_err },
+	{ true, "RDPES12", dpp_rx_err },
+	{ true, "RDPES13", dpp_rx_err },
+	{ true, "RDPES14", dpp_rx_err },
+	{ true, "RDPES15", dpp_rx_err },
 };
 
 static void dwxgmac3_handle_dma_err(struct net_device *ndev,
@@ -1505,13 +1504,14 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
 	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
-static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
-				   u32 num_txq,
-				   u32 num_rxq, bool enable)
+static void dwxgmac3_fpe_configure(void __iomem *ioaddr,
+				   struct stmmac_fpe_cfg *cfg,
+				   u32 num_txq, u32 num_rxq,
+				   bool tx_enable, bool pmac_enable)
 {
 	u32 value;
 
-	if (!enable) {
+	if (!tx_enable) {
 		value = readl(ioaddr + XGMAC_FPE_CTRL_STS);
 
 		value &= ~XGMAC_EFPE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index fc82862a612c..389aad7b5c1e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -56,10 +56,12 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)
 
 static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
-	p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
+	u32 flags = XGMAC_RDES3_OWN;
 
 	if (!disable_rx_ic)
-		p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
+		flags |= XGMAC_RDES3_IOC;
+
+	p->des3 |= cpu_to_le32(flags);
 }
 
 static int dwxgmac2_get_tx_ls(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index dd2ab6185c40..7840bc403788 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -20,7 +20,7 @@ static int dwxgmac2_dma_reset(void __iomem *ioaddr)
 }
 
 static void dwxgmac2_dma_init(void __iomem *ioaddr,
-			      struct stmmac_dma_cfg *dma_cfg, int atds)
+			      struct stmmac_dma_cfg *dma_cfg)
 {
 	u32 value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 29367105df54..88cce28b2f98 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -171,7 +171,7 @@ static const struct stmmac_hwif_entry {
 		.mac = &dwmac4_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
-		.tc = &dwmac510_tc_ops,
+		.tc = &dwmac4_tc_ops,
 		.mmc = &dwmac_mmc_ops,
 		.est = &dwmac510_est_ops,
 		.setup = dwmac4_setup,
@@ -252,7 +252,7 @@ static const struct stmmac_hwif_entry {
 		.mac = &dwxgmac210_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
-		.tc = &dwmac510_tc_ops,
+		.tc = &dwxgmac_tc_ops,
 		.mmc = &dwxgmac_mmc_ops,
 		.est = &dwmac510_est_ops,
 		.setup = dwxgmac2_setup,
@@ -273,7 +273,7 @@ static const struct stmmac_hwif_entry {
 		.mac = &dwxlgmac2_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
-		.tc = &dwmac510_tc_ops,
+		.tc = &dwxgmac_tc_ops,
 		.mmc = &dwxgmac_mmc_ops,
 		.est = &dwmac510_est_ops,
 		.setup = dwxlgmac2_setup,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index e53c32362774..d5a9f01ecac5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -7,6 +7,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/stmmac.h>
+#include <net/pkt_cls.h>
 
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
@@ -28,6 +29,8 @@
 struct stmmac_extra_stats;
 struct stmmac_priv;
 struct stmmac_safety_stats;
+struct stmmac_fpe_cfg;
+enum stmmac_mpacket_type;
 struct dma_desc;
 struct dma_extended_desc;
 struct dma_edesc;
@@ -175,8 +178,7 @@ struct dma_features;
 struct stmmac_dma_ops {
 	/* DMA core initialization */
 	int (*reset)(void __iomem *ioaddr);
-	void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
-		     int atds);
+	void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg);
 	void (*init_chan)(struct stmmac_priv *priv, void __iomem *ioaddr,
 			  struct stmmac_dma_cfg *dma_cfg, u32 chan);
 	void (*init_rx_chan)(struct stmmac_priv *priv, void __iomem *ioaddr,
@@ -198,7 +200,7 @@ struct stmmac_dma_ops {
 	/* To track extra statistic (if supported) */
 	void (*dma_diagnostic_fr)(struct stmmac_extra_stats *x,
 				  void __iomem *ioaddr);
-	void (*enable_dma_transmission) (void __iomem *ioaddr);
+	void (*enable_dma_transmission)(void __iomem *ioaddr, u32 chan);
 	void (*enable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
 			       u32 chan, bool rx, bool tx);
 	void (*disable_dma_irq)(struct stmmac_priv *priv, void __iomem *ioaddr,
@@ -420,11 +422,16 @@ struct stmmac_ops {
 	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
 	void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
 			      u32 num_txq, u32 num_rxq,
-			      bool enable);
+			      bool tx_enable, bool pmac_enable);
 	void (*fpe_send_mpacket)(void __iomem *ioaddr,
 				 struct stmmac_fpe_cfg *cfg,
 				 enum stmmac_mpacket_type type);
 	int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev);
+	int (*fpe_get_add_frag_size)(const void __iomem *ioaddr);
+	void (*fpe_set_add_frag_size)(void __iomem *ioaddr, u32 add_frag_size);
+	int (*fpe_map_preemption_class)(struct net_device *ndev,
+					struct netlink_ext_ack *extack,
+					u32 pclass);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -529,6 +536,12 @@ struct stmmac_ops {
 	stmmac_do_void_callback(__priv, mac, fpe_send_mpacket, __args)
 #define stmmac_fpe_irq_status(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, fpe_irq_status, __args)
+#define stmmac_fpe_get_add_frag_size(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, fpe_get_add_frag_size, __args)
+#define stmmac_fpe_set_add_frag_size(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_set_add_frag_size, __args)
+#define stmmac_fpe_map_preemption_class(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_map_preemption_class, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -616,6 +629,8 @@ struct stmmac_tc_ops {
 			 struct tc_etf_qopt_offload *qopt);
 	int (*query_caps)(struct stmmac_priv *priv,
 			  struct tc_query_caps_base *base);
+	int (*setup_mqprio)(struct stmmac_priv *priv,
+			    struct tc_mqprio_qopt_offload *qopt);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
@@ -632,6 +647,8 @@ struct stmmac_tc_ops {
 	stmmac_do_callback(__priv, tc, setup_etf, __args)
 #define stmmac_tc_query_caps(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, query_caps, __args)
+#define stmmac_tc_setup_mqprio(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_mqprio, __args)
 
 struct stmmac_counters;
 
@@ -675,7 +692,9 @@ extern const struct stmmac_dma_ops dwmac4_dma_ops;
 extern const struct stmmac_ops dwmac410_ops;
 extern const struct stmmac_dma_ops dwmac410_dma_ops;
 extern const struct stmmac_ops dwmac510_ops;
+extern const struct stmmac_tc_ops dwmac4_tc_ops;
 extern const struct stmmac_tc_ops dwmac510_tc_ops;
+extern const struct stmmac_tc_ops dwxgmac_tc_ops;
 extern const struct stmmac_ops dwxgmac210_ops;
 extern const struct stmmac_ops dwxlgmac2_ops;
 extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index b23b920eedb1..ea135203ff2e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -146,6 +146,32 @@ struct stmmac_channel {
 	u32 index;
 };
 
+/* FPE link-partner hand-shaking mPacket type */
+enum stmmac_mpacket_type {
+	MPACKET_VERIFY = 0,
+	MPACKET_RESPONSE = 1,
+};
+
+#define STMMAC_FPE_MM_MAX_VERIFY_RETRIES	3
+#define STMMAC_FPE_MM_MAX_VERIFY_TIME_MS	128
+
+struct stmmac_fpe_cfg {
+	/* Serialize access to MAC Merge state between ethtool requests
+	 * and link state updates.
+	 */
+	spinlock_t lock;
+
+	u32 fpe_csr;				/* MAC_FPE_CTRL_STS reg cache */
+
+	enum ethtool_mm_verify_status status;
+	struct timer_list verify_timer;
+	bool verify_enabled;
+	int verify_retries;
+	bool pmac_enabled;
+	u32 verify_time;
+	bool tx_enabled;
+};
+
 struct stmmac_tc_entry {
 	bool in_use;
 	bool in_hw;
@@ -339,11 +365,8 @@ struct stmmac_priv {
 	struct workqueue_struct *wq;
 	struct work_struct service_task;
 
-	/* Workqueue for handling FPE hand-shaking */
-	unsigned long fpe_task_state;
-	struct workqueue_struct *fpe_wq;
-	struct work_struct fpe_task;
-	char wq_name[IFNAMSIZ + 4];
+	/* Frame Preemption feature (FPE) */
+	struct stmmac_fpe_cfg fpe_cfg;
 
 	/* TC Handling */
 	unsigned int tc_entries_max;
@@ -397,7 +420,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv);
 int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
 int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
 int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
-void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable);
+void stmmac_fpe_apply(struct stmmac_priv *priv);
 
 static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv)
 {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 7008219fd88d..2a37592a6281 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -19,6 +19,7 @@
 #include "stmmac.h"
 #include "dwmac_dma.h"
 #include "dwxgmac2.h"
+#include "dwmac5.h"
 
 #define REG_SPACE_SIZE	0x1060
 #define GMAC4_REG_SPACE_SIZE	0x116C
@@ -438,13 +439,6 @@ static void stmmac_ethtool_setmsglevel(struct net_device *dev, u32 level)
 
 }
 
-static int stmmac_check_if_running(struct net_device *dev)
-{
-	if (!netif_running(dev))
-		return -EBUSY;
-	return 0;
-}
-
 static int stmmac_ethtool_get_regs_len(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
@@ -1207,13 +1201,13 @@ static int stmmac_get_ts_info(struct net_device *dev,
 
 		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
 					SOF_TIMESTAMPING_TX_HARDWARE |
-					SOF_TIMESTAMPING_RX_SOFTWARE |
 					SOF_TIMESTAMPING_RX_HARDWARE |
-					SOF_TIMESTAMPING_SOFTWARE |
 					SOF_TIMESTAMPING_RAW_HARDWARE;
 
 		if (priv->ptp_clock)
 			info->phc_index = ptp_clock_index(priv->ptp_clock);
+		else
+			info->phc_index = 0;
 
 		info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
 
@@ -1270,10 +1264,101 @@ static int stmmac_set_tunable(struct net_device *dev,
 	return ret;
 }
 
+static int stmmac_get_mm(struct net_device *ndev,
+			 struct ethtool_mm_state *state)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	unsigned long flags;
+	u32 frag_size;
+
+	if (!priv->dma_cap.fpesel)
+		return -EOPNOTSUPP;
+
+	spin_lock_irqsave(&priv->fpe_cfg.lock, flags);
+
+	state->max_verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS;
+	state->verify_enabled = priv->fpe_cfg.verify_enabled;
+	state->pmac_enabled = priv->fpe_cfg.pmac_enabled;
+	state->verify_time = priv->fpe_cfg.verify_time;
+	state->tx_enabled = priv->fpe_cfg.tx_enabled;
+	state->verify_status = priv->fpe_cfg.status;
+	state->rx_min_frag_size = ETH_ZLEN;
+
+	/* FPE active if common tx_enabled and
+	 * (verification success or disabled(forced))
+	 */
+	if (state->tx_enabled &&
+	    (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED ||
+	     state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED))
+		state->tx_active = true;
+	else
+		state->tx_active = false;
+
+	frag_size = stmmac_fpe_get_add_frag_size(priv, priv->ioaddr);
+	state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(frag_size);
+
+	spin_unlock_irqrestore(&priv->fpe_cfg.lock, flags);
+
+	return 0;
+}
+
+static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg,
+			 struct netlink_ext_ack *extack)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg;
+	unsigned long flags;
+	u32 frag_size;
+	int err;
+
+	err = ethtool_mm_frag_size_min_to_add(cfg->tx_min_frag_size,
+					      &frag_size, extack);
+	if (err)
+		return err;
+
+	/* Wait for the verification that's currently in progress to finish */
+	timer_shutdown_sync(&fpe_cfg->verify_timer);
+
+	spin_lock_irqsave(&fpe_cfg->lock, flags);
+
+	fpe_cfg->verify_enabled = cfg->verify_enabled;
+	fpe_cfg->pmac_enabled = cfg->pmac_enabled;
+	fpe_cfg->verify_time = cfg->verify_time;
+	fpe_cfg->tx_enabled = cfg->tx_enabled;
+
+	if (!cfg->verify_enabled)
+		fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_DISABLED;
+
+	stmmac_fpe_set_add_frag_size(priv, priv->ioaddr, frag_size);
+	stmmac_fpe_apply(priv);
+
+	spin_unlock_irqrestore(&fpe_cfg->lock, flags);
+
+	return 0;
+}
+
+static void stmmac_get_mm_stats(struct net_device *ndev,
+				struct ethtool_mm_stats *s)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct stmmac_counters *mmc = &priv->mmc;
+
+	if (!priv->dma_cap.rmon)
+		return;
+
+	stmmac_mmc_read(priv, priv->mmcaddr, mmc);
+
+	s->MACMergeFrameAssErrorCount = mmc->mmc_rx_packet_assembly_err_cntr;
+	s->MACMergeFrameAssOkCount = mmc->mmc_rx_packet_assembly_ok_cntr;
+	s->MACMergeFrameSmdErrorCount = mmc->mmc_rx_packet_smd_err_cntr;
+	s->MACMergeFragCountRx = mmc->mmc_rx_fpe_fragment_cntr;
+	s->MACMergeFragCountTx = mmc->mmc_tx_fpe_fragment_cntr;
+	s->MACMergeHoldCount = mmc->mmc_tx_hold_req_cntr;
+}
+
 static const struct ethtool_ops stmmac_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
-	.begin = stmmac_check_if_running,
 	.get_drvinfo = stmmac_ethtool_getdrvinfo,
 	.get_msglevel = stmmac_ethtool_getmsglevel,
 	.set_msglevel = stmmac_ethtool_setmsglevel,
@@ -1309,6 +1394,9 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.set_tunable = stmmac_set_tunable,
 	.get_link_ksettings = stmmac_ethtool_get_link_ksettings,
 	.set_link_ksettings = stmmac_ethtool_set_link_ksettings,
+	.get_mm = stmmac_get_mm,
+	.set_mm = stmmac_set_mm,
+	.get_mm_stats = stmmac_get_mm_stats,
 };
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f3a1b179aaea..e2140482270a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -968,18 +968,31 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
 
 static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
 {
-	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
-	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
-	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
-	bool *hs_enable = &fpe_cfg->hs_enable;
+	struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg;
+	unsigned long flags;
 
-	if (is_up && *hs_enable) {
-		stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg,
-					MPACKET_VERIFY);
+	timer_shutdown_sync(&fpe_cfg->verify_timer);
+
+	spin_lock_irqsave(&fpe_cfg->lock, flags);
+
+	if (is_up && fpe_cfg->pmac_enabled) {
+		/* VERIFY process requires pmac enabled when NIC comes up */
+		stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use,
+				     false, true);
+
+		/* New link => maybe new partner => new verification process */
+		stmmac_fpe_apply(priv);
 	} else {
-		*lo_state = FPE_STATE_OFF;
-		*lp_state = FPE_STATE_OFF;
+		/* No link => turn off EFPE */
+		stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use,
+				     false, false);
 	}
+
+	spin_unlock_irqrestore(&fpe_cfg->lock, flags);
 }
 
 static void stmmac_mac_link_down(struct phylink_config *config,
@@ -2022,7 +2035,7 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv,
 	rx_q->queue_index = queue;
 	rx_q->priv_data = priv;
 
-	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp_params.flags = PP_FLAG_DMA_MAP | (xdp_prog ? PP_FLAG_DMA_SYNC_DEV : 0);
 	pp_params.pool_size = dma_conf->dma_rx_size;
 	num_pages = DIV_ROUND_UP(dma_conf->dma_buf_sz, PAGE_SIZE);
 	pp_params.order = ilog2(num_pages);
@@ -2367,9 +2380,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 	if (txfifosz == 0)
 		txfifosz = priv->dma_cap.tx_fifo_size;
 
-	/* Adjust for real per queue fifo size */
-	rxfifosz /= rx_channels_count;
-	txfifosz /= tx_channels_count;
+	/* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */
+	if (priv->plat->has_gmac4 || priv->plat->has_xgmac) {
+		rxfifosz /= rx_channels_count;
+		txfifosz /= tx_channels_count;
+	}
 
 	if (priv->plat->force_thresh_dma_mode) {
 		txmode = tc;
@@ -2553,7 +2568,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 				       true, priv->mode, true, true,
 				       xdp_desc.len);
 
-		stmmac_enable_dma_transmission(priv, priv->ioaddr);
+		stmmac_enable_dma_transmission(priv, priv->ioaddr, queue);
 
 		xsk_tx_metadata_to_compl(meta,
 					 &tx_q->tx_skbuff_dma[entry].xsk_meta);
@@ -3003,7 +3018,6 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	struct stmmac_rx_queue *rx_q;
 	struct stmmac_tx_queue *tx_q;
 	u32 chan = 0;
-	int atds = 0;
 	int ret = 0;
 
 	if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) {
@@ -3012,7 +3026,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	}
 
 	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
-		atds = 1;
+		priv->plat->dma_cfg->atds = 1;
 
 	ret = stmmac_reset(priv, priv->ioaddr);
 	if (ret) {
@@ -3021,7 +3035,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	}
 
 	/* DMA Configuration */
-	stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, atds);
+	stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg);
 
 	if (priv->plat->axi)
 		stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
@@ -3357,27 +3371,6 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 	}
 }
 
-static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
-{
-	char *name;
-
-	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
-	clear_bit(__FPE_REMOVING,  &priv->fpe_task_state);
-
-	name = priv->wq_name;
-	sprintf(name, "%s-fpe", priv->dev->name);
-
-	priv->fpe_wq = create_singlethread_workqueue(name);
-	if (!priv->fpe_wq) {
-		netdev_err(priv->dev, "%s: Failed to create workqueue\n", name);
-
-		return -ENOMEM;
-	}
-	netdev_info(priv->dev, "FPE workqueue start");
-
-	return 0;
-}
-
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -3532,13 +3525,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
 
 	stmmac_set_hw_vlan_mode(priv, priv->hw);
 
-	if (priv->dma_cap.fpesel) {
-		stmmac_fpe_start_wq(priv);
-
-		if (priv->plat->fpe_cfg->enable)
-			stmmac_fpe_handshake(priv, true);
-	}
-
 	return 0;
 }
 
@@ -4035,18 +4021,6 @@ static int stmmac_open(struct net_device *dev)
 	return ret;
 }
 
-static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
-{
-	set_bit(__FPE_REMOVING, &priv->fpe_task_state);
-
-	if (priv->fpe_wq) {
-		destroy_workqueue(priv->fpe_wq);
-		priv->fpe_wq = NULL;
-	}
-
-	netdev_info(priv->dev, "FPE workqueue stop");
-}
-
 /**
  *  stmmac_release - close entry point of the driver
  *  @dev : device pointer.
@@ -4094,10 +4068,10 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_release_ptp(priv);
 
-	pm_runtime_put(priv->device);
-
 	if (priv->dma_cap.fpesel)
-		stmmac_fpe_stop_wq(priv);
+		timer_shutdown_sync(&priv->fpe_cfg.verify_timer);
+
+	pm_runtime_put(priv->device);
 
 	return 0;
 }
@@ -4754,7 +4728,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+	stmmac_enable_dma_transmission(priv, priv->ioaddr, queue);
 
 	stmmac_flush_tx_descriptors(priv, queue);
 	stmmac_tx_timer_arm(priv, queue);
@@ -4981,7 +4955,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
 		u64_stats_update_end(&txq_stats->q_syncp);
 	}
 
-	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+	stmmac_enable_dma_transmission(priv, priv->ioaddr, queue);
 
 	entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size);
 	tx_q->cur_tx = entry;
@@ -5981,45 +5955,31 @@ static int stmmac_set_features(struct net_device *netdev,
 
 static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
 {
-	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
-	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
-	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
-	bool *hs_enable = &fpe_cfg->hs_enable;
+	struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg;
 
-	if (status == FPE_EVENT_UNKNOWN || !*hs_enable)
-		return;
+	/* This is interrupt context, just spin_lock() */
+	spin_lock(&fpe_cfg->lock);
 
-	/* If LP has sent verify mPacket, LP is FPE capable */
-	if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) {
-		if (*lp_state < FPE_STATE_CAPABLE)
-			*lp_state = FPE_STATE_CAPABLE;
+	if (!fpe_cfg->pmac_enabled || status == FPE_EVENT_UNKNOWN)
+		goto unlock_out;
 
-		/* If user has requested FPE enable, quickly response */
-		if (*hs_enable)
-			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
-						fpe_cfg,
-						MPACKET_RESPONSE);
-	}
-
-	/* If Local has sent verify mPacket, Local is FPE capable */
-	if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) {
-		if (*lo_state < FPE_STATE_CAPABLE)
-			*lo_state = FPE_STATE_CAPABLE;
-	}
+	/* LP has sent verify mPacket */
+	if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER)
+		stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg,
+					MPACKET_RESPONSE);
 
-	/* If LP has sent response mPacket, LP is entering FPE ON */
-	if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP)
-		*lp_state = FPE_STATE_ENTERING_ON;
+	/* Local has sent verify mPacket */
+	if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER &&
+	    fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED)
+		fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_VERIFYING;
 
-	/* If Local has sent response mPacket, Local is entering FPE ON */
-	if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP)
-		*lo_state = FPE_STATE_ENTERING_ON;
+	/* LP has sent response mPacket */
+	if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP &&
+	    fpe_cfg->status == ETHTOOL_MM_VERIFY_STATUS_VERIFYING)
+		fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED;
 
-	if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) &&
-	    !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) &&
-	    priv->fpe_wq) {
-		queue_work(priv->fpe_wq, &priv->fpe_task);
-	}
+unlock_out:
+	spin_unlock(&fpe_cfg->lock);
 }
 
 static void stmmac_common_interrupt(struct stmmac_priv *priv)
@@ -6256,6 +6216,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 	switch (type) {
 	case TC_QUERY_CAPS:
 		return stmmac_tc_query_caps(priv, priv, type_data);
+	case TC_SETUP_QDISC_MQPRIO:
+		return stmmac_tc_setup_mqprio(priv, priv, type_data);
 	case TC_SETUP_BLOCK:
 		return flow_block_cb_setup_simple(type_data,
 						  &stmmac_block_cb_list,
@@ -7375,68 +7337,87 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size)
 	return ret;
 }
 
-#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n"
-static void stmmac_fpe_lp_task(struct work_struct *work)
+/**
+ * stmmac_fpe_verify_timer - Timer for MAC Merge verification
+ * @t:  timer_list struct containing private info
+ *
+ * Verify the MAC Merge capability in the local TX direction, by
+ * transmitting Verify mPackets up to 3 times. Wait until link
+ * partner responds with a Response mPacket, otherwise fail.
+ */
+static void stmmac_fpe_verify_timer(struct timer_list *t)
 {
-	struct stmmac_priv *priv = container_of(work, struct stmmac_priv,
-						fpe_task);
-	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
-	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
-	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
-	bool *hs_enable = &fpe_cfg->hs_enable;
-	bool *enable = &fpe_cfg->enable;
-	int retries = 20;
-
-	while (retries-- > 0) {
-		/* Bail out immediately if FPE handshake is OFF */
-		if (*lo_state == FPE_STATE_OFF || !*hs_enable)
-			break;
-
-		if (*lo_state == FPE_STATE_ENTERING_ON &&
-		    *lp_state == FPE_STATE_ENTERING_ON) {
-			stmmac_fpe_configure(priv, priv->ioaddr,
-					     fpe_cfg,
-					     priv->plat->tx_queues_to_use,
-					     priv->plat->rx_queues_to_use,
-					     *enable);
+	struct stmmac_fpe_cfg *fpe_cfg = from_timer(fpe_cfg, t, verify_timer);
+	struct stmmac_priv *priv = container_of(fpe_cfg, struct stmmac_priv,
+						fpe_cfg);
+	unsigned long flags;
+	bool rearm = false;
 
-			netdev_info(priv->dev, "configured FPE\n");
+	spin_lock_irqsave(&fpe_cfg->lock, flags);
 
-			*lo_state = FPE_STATE_ON;
-			*lp_state = FPE_STATE_ON;
-			netdev_info(priv->dev, "!!! BOTH FPE stations ON\n");
-			break;
-		}
-
-		if ((*lo_state == FPE_STATE_CAPABLE ||
-		     *lo_state == FPE_STATE_ENTERING_ON) &&
-		     *lp_state != FPE_STATE_ON) {
-			netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT,
-				    *lo_state, *lp_state);
+	switch (fpe_cfg->status) {
+	case ETHTOOL_MM_VERIFY_STATUS_INITIAL:
+	case ETHTOOL_MM_VERIFY_STATUS_VERIFYING:
+		if (fpe_cfg->verify_retries != 0) {
 			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
-						fpe_cfg,
-						MPACKET_VERIFY);
+						fpe_cfg, MPACKET_VERIFY);
+			rearm = true;
+		} else {
+			fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_FAILED;
 		}
-		/* Sleep then retry */
-		msleep(500);
+
+		fpe_cfg->verify_retries--;
+		break;
+
+	case ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED:
+		stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use,
+				     true, true);
+		break;
+
+	default:
+		break;
+	}
+
+	if (rearm) {
+		mod_timer(&fpe_cfg->verify_timer,
+			  jiffies + msecs_to_jiffies(fpe_cfg->verify_time));
 	}
 
-	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+	spin_unlock_irqrestore(&fpe_cfg->lock, flags);
 }
 
-void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
+static void stmmac_fpe_verify_timer_arm(struct stmmac_fpe_cfg *fpe_cfg)
 {
-	if (priv->plat->fpe_cfg->hs_enable != enable) {
-		if (enable) {
-			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
-						priv->plat->fpe_cfg,
-						MPACKET_VERIFY);
-		} else {
-			priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF;
-			priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF;
-		}
+	if (fpe_cfg->pmac_enabled && fpe_cfg->tx_enabled &&
+	    fpe_cfg->verify_enabled &&
+	    fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_FAILED &&
+	    fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) {
+		timer_setup(&fpe_cfg->verify_timer, stmmac_fpe_verify_timer, 0);
+		mod_timer(&fpe_cfg->verify_timer, jiffies);
+	}
+}
 
-		priv->plat->fpe_cfg->hs_enable = enable;
+void stmmac_fpe_apply(struct stmmac_priv *priv)
+{
+	struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg;
+
+	/* If verification is disabled, configure FPE right away.
+	 * Otherwise let the timer code do it.
+	 */
+	if (!fpe_cfg->verify_enabled) {
+		stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use,
+				     fpe_cfg->tx_enabled,
+				     fpe_cfg->pmac_enabled);
+	} else {
+		fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_INITIAL;
+		fpe_cfg->verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES;
+
+		if (netif_running(priv->dev))
+			stmmac_fpe_verify_timer_arm(fpe_cfg);
 	}
 }
 
@@ -7554,9 +7535,6 @@ int stmmac_dvr_probe(struct device *device,
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
 
-	/* Initialize Link Partner FPE workqueue */
-	INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task);
-
 	/* Override with kernel parameters if supplied XXX CRS XXX
 	 * this needs to have multiple instances
 	 */
@@ -7721,6 +7699,12 @@ int stmmac_dvr_probe(struct device *device,
 
 	mutex_init(&priv->lock);
 
+	priv->fpe_cfg.verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES;
+	priv->fpe_cfg.verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS;
+	priv->fpe_cfg.status = ETHTOOL_MM_VERIFY_STATUS_DISABLED;
+	timer_setup(&priv->fpe_cfg.verify_timer, stmmac_fpe_verify_timer, 0);
+	spin_lock_init(&priv->fpe_cfg.lock);
+
 	/* If a specific clk_csr value is passed from the platform
 	 * this means that the CSR Clock Range selection cannot be
 	 * changed at run-time and it is fixed. Viceversa the driver'll try to
@@ -7894,16 +7878,8 @@ int stmmac_suspend(struct device *dev)
 	}
 	rtnl_unlock();
 
-	if (priv->dma_cap.fpesel) {
-		/* Disable FPE */
-		stmmac_fpe_configure(priv, priv->ioaddr,
-				     priv->plat->fpe_cfg,
-				     priv->plat->tx_queues_to_use,
-				     priv->plat->rx_queues_to_use, false);
-
-		stmmac_fpe_handshake(priv, false);
-		stmmac_fpe_stop_wq(priv);
-	}
+	if (priv->dma_cap.fpesel)
+		timer_shutdown_sync(&priv->fpe_cfg.verify_timer);
 
 	priv->speed = SPEED_UNKNOWN;
 	return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 996f2bcd07a2..75ad2da1a37f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -282,16 +282,6 @@ static int tc_init(struct stmmac_priv *priv)
 	if (ret)
 		return -ENOMEM;
 
-	if (!priv->plat->fpe_cfg) {
-		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
-						   sizeof(*priv->plat->fpe_cfg),
-						   GFP_KERNEL);
-		if (!priv->plat->fpe_cfg)
-			return -ENOMEM;
-	} else {
-		memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg));
-	}
-
 	/* Fail silently as we can still use remaining features, e.g. CBS */
 	if (!dma_cap->frpsel)
 		return 0;
@@ -396,6 +386,7 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
 			return ret;
 
 		priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+		return 0;
 	}
 
 	/* Final adjustments for HW */
@@ -941,9 +932,9 @@ static int tc_taprio_configure(struct stmmac_priv *priv,
 			       struct tc_taprio_qopt_offload *qopt)
 {
 	u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep;
+	struct netlink_ext_ack *extack = qopt->mqprio.extack;
 	struct timespec64 time, current_time, qopt_time;
 	ktime_t current_time_ns;
-	bool fpe = false;
 	int i, ret = 0;
 	u64 ctr;
 
@@ -1028,16 +1019,12 @@ static int tc_taprio_configure(struct stmmac_priv *priv,
 
 		switch (qopt->entries[i].command) {
 		case TC_TAPRIO_CMD_SET_GATES:
-			if (fpe)
-				return -EINVAL;
 			break;
 		case TC_TAPRIO_CMD_SET_AND_HOLD:
 			gates |= BIT(0);
-			fpe = true;
 			break;
 		case TC_TAPRIO_CMD_SET_AND_RELEASE:
 			gates &= ~BIT(0);
-			fpe = true;
 			break;
 		default:
 			return -EOPNOTSUPP;
@@ -1068,16 +1055,6 @@ static int tc_taprio_configure(struct stmmac_priv *priv,
 
 	tc_taprio_map_maxsdu_txq(priv, qopt);
 
-	if (fpe && !priv->dma_cap.fpesel) {
-		mutex_unlock(&priv->est_lock);
-		return -EOPNOTSUPP;
-	}
-
-	/* Actual FPE register configuration will be done after FPE handshake
-	 * is success.
-	 */
-	priv->plat->fpe_cfg->enable = fpe;
-
 	ret = stmmac_est_configure(priv, priv, priv->est,
 				   priv->plat->clk_ptp_rate);
 	mutex_unlock(&priv->est_lock);
@@ -1086,12 +1063,10 @@ static int tc_taprio_configure(struct stmmac_priv *priv,
 		goto disable;
 	}
 
-	netdev_info(priv->dev, "configured EST\n");
-
-	if (fpe) {
-		stmmac_fpe_handshake(priv, true);
-		netdev_info(priv->dev, "start FPE handshake\n");
-	}
+	ret = stmmac_fpe_map_preemption_class(priv, priv->dev, extack,
+					      qopt->mqprio.preemptible_tcs);
+	if (ret)
+		goto disable;
 
 	return 0;
 
@@ -1109,16 +1084,7 @@ disable:
 		mutex_unlock(&priv->est_lock);
 	}
 
-	priv->plat->fpe_cfg->enable = false;
-	stmmac_fpe_configure(priv, priv->ioaddr,
-			     priv->plat->fpe_cfg,
-			     priv->plat->tx_queues_to_use,
-			     priv->plat->rx_queues_to_use,
-			     false);
-	netdev_info(priv->dev, "disabled FPE\n");
-
-	stmmac_fpe_handshake(priv, false);
-	netdev_info(priv->dev, "stop FPE handshake\n");
+	stmmac_fpe_map_preemption_class(priv, priv->dev, extack, 0);
 
 	return ret;
 }
@@ -1174,6 +1140,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 	return err;
 }
 
+static int tc_setup_taprio_without_fpe(struct stmmac_priv *priv,
+				       struct tc_taprio_qopt_offload *qopt)
+{
+	if (!qopt->mqprio.preemptible_tcs)
+		return tc_setup_taprio(priv, qopt);
+
+	NL_SET_ERR_MSG_MOD(qopt->mqprio.extack,
+			   "taprio with FPE is not implemented for this MAC");
+
+	return -EOPNOTSUPP;
+}
+
 static int tc_setup_etf(struct stmmac_priv *priv,
 			struct tc_etf_qopt_offload *qopt)
 {
@@ -1198,6 +1176,13 @@ static int tc_query_caps(struct stmmac_priv *priv,
 			 struct tc_query_caps_base *base)
 {
 	switch (base->type) {
+	case TC_SETUP_QDISC_MQPRIO: {
+		struct tc_mqprio_caps *caps = base->caps;
+
+		caps->validate_queue_counts = true;
+
+		return 0;
+	}
 	case TC_SETUP_QDISC_TAPRIO: {
 		struct tc_taprio_caps *caps = base->caps;
 
@@ -1214,6 +1199,81 @@ static int tc_query_caps(struct stmmac_priv *priv,
 	}
 }
 
+static void stmmac_reset_tc_mqprio(struct net_device *ndev,
+				   struct netlink_ext_ack *extack)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	netdev_reset_tc(ndev);
+	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
+	stmmac_fpe_map_preemption_class(priv, ndev, extack, 0);
+}
+
+static int tc_setup_dwmac510_mqprio(struct stmmac_priv *priv,
+				    struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct netlink_ext_ack *extack = mqprio->extack;
+	struct tc_mqprio_qopt *qopt = &mqprio->qopt;
+	u32 offset, count, num_stack_tx_queues = 0;
+	struct net_device *ndev = priv->dev;
+	u32 num_tc = qopt->num_tc;
+	int err;
+
+	if (!num_tc) {
+		stmmac_reset_tc_mqprio(ndev, extack);
+		return 0;
+	}
+
+	err = netdev_set_num_tc(ndev, num_tc);
+	if (err)
+		return err;
+
+	for (u32 tc = 0; tc < num_tc; tc++) {
+		offset = qopt->offset[tc];
+		count = qopt->count[tc];
+		num_stack_tx_queues += count;
+
+		err = netdev_set_tc_queue(ndev, tc, count, offset);
+		if (err)
+			goto err_reset_tc;
+	}
+
+	err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
+	if (err)
+		goto err_reset_tc;
+
+	err = stmmac_fpe_map_preemption_class(priv, ndev, extack,
+					      mqprio->preemptible_tcs);
+	if (err)
+		goto err_reset_tc;
+
+	return 0;
+
+err_reset_tc:
+	stmmac_reset_tc_mqprio(ndev, extack);
+
+	return err;
+}
+
+static int tc_setup_mqprio_unimplemented(struct stmmac_priv *priv,
+					 struct tc_mqprio_qopt_offload *mqprio)
+{
+	NL_SET_ERR_MSG_MOD(mqprio->extack,
+			   "mqprio HW offload is not implemented for this MAC");
+	return -EOPNOTSUPP;
+}
+
+const struct stmmac_tc_ops dwmac4_tc_ops = {
+	.init = tc_init,
+	.setup_cls_u32 = tc_setup_cls_u32,
+	.setup_cbs = tc_setup_cbs,
+	.setup_cls = tc_setup_cls,
+	.setup_taprio = tc_setup_taprio_without_fpe,
+	.setup_etf = tc_setup_etf,
+	.query_caps = tc_query_caps,
+	.setup_mqprio = tc_setup_mqprio_unimplemented,
+};
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.init = tc_init,
 	.setup_cls_u32 = tc_setup_cls_u32,
@@ -1222,4 +1282,16 @@ const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.setup_taprio = tc_setup_taprio,
 	.setup_etf = tc_setup_etf,
 	.query_caps = tc_query_caps,
+	.setup_mqprio = tc_setup_dwmac510_mqprio,
+};
+
+const struct stmmac_tc_ops dwxgmac_tc_ops = {
+	.init = tc_init,
+	.setup_cls_u32 = tc_setup_cls_u32,
+	.setup_cbs = tc_setup_cbs,
+	.setup_cls = tc_setup_cls,
+	.setup_taprio = tc_setup_taprio_without_fpe,
+	.setup_etf = tc_setup_etf,
+	.query_caps = tc_query_caps,
+	.setup_mqprio = tc_setup_mqprio_unimplemented,
 };
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 2f30715e9b67..1e887d951a04 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -114,37 +114,23 @@ static void vnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	struct vnet *vp = (struct vnet *)netdev_priv(dev);
 	struct vnet_port *port;
-	char *p = (char *)buf;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
 		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
-		p += sizeof(ethtool_stats_keys);
+		buf += sizeof(ethtool_stats_keys);
 
 		rcu_read_lock();
 		list_for_each_entry_rcu(port, &vp->port_list, list) {
-			snprintf(p, ETH_GSTRING_LEN, "p%u.%s-%pM",
-				 port->q_index, port->switch_port ? "s" : "q",
-				 port->raddr);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.rx_packets",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.tx_packets",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.rx_bytes",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.tx_bytes",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.event_up",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "p%u.event_reset",
-				 port->q_index);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&buf, "p%u.%s-%pM", port->q_index,
+					port->switch_port ? "s" : "q",
+					port->raddr);
+			ethtool_sprintf(&buf, "p%u.rx_packets", port->q_index);
+			ethtool_sprintf(&buf, "p%u.tx_packets", port->q_index);
+			ethtool_sprintf(&buf, "p%u.rx_bytes", port->q_index);
+			ethtool_sprintf(&buf, "p%u.tx_bytes", port->q_index);
+			ethtool_sprintf(&buf, "p%u.event_up", port->q_index);
+			ethtool_sprintf(&buf, "p%u.event_reset", port->q_index);
 		}
 		rcu_read_unlock();
 		break;
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index ede5f7890fb4..fc77f424f90b 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1671,7 +1671,7 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
 
 #endif
 #ifdef BDX_LLTX
-	netif_trans_update(ndev); /* NETIF_F_LLTX driver :( */
+	netif_trans_update(ndev); /* dev->lltx driver :( */
 #endif
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += skb->len;
@@ -2019,7 +2019,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 * set multicast list callback has to use priv->tx_lock.
 		 */
 #ifdef BDX_LLTX
-		ndev->features |= NETIF_F_LLTX;
+		ndev->lltx = true;
 #endif
 		/* MTU range: 60 - 16384 */
 		ndev->min_mtu = ETH_ZLEN;
diff --git a/drivers/net/ethernet/tehuti/tehuti.h b/drivers/net/ethernet/tehuti/tehuti.h
index 909e7296cecf..47a2d3e5f8ed 100644
--- a/drivers/net/ethernet/tehuti/tehuti.h
+++ b/drivers/net/ethernet/tehuti/tehuti.h
@@ -260,7 +260,7 @@ struct bdx_priv {
 	int tx_update_mark;
 	int tx_noupd;
 #endif
-	spinlock_t tx_lock;	/* NETIF_F_LLTX mode */
+	spinlock_t tx_lock;	/* dev->lltx mode */
 
 	/* rarely used */
 	u8 port;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index b60976947da5..9032444435e9 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -427,9 +427,9 @@ static void am65_cpsw_get_channels(struct net_device *ndev,
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
 
-	ch->max_rx = AM65_CPSW_MAX_RX_QUEUES;
-	ch->max_tx = AM65_CPSW_MAX_TX_QUEUES;
-	ch->rx_count = AM65_CPSW_MAX_RX_QUEUES;
+	ch->max_rx = AM65_CPSW_MAX_QUEUES;
+	ch->max_tx = AM65_CPSW_MAX_QUEUES;
+	ch->rx_count = common->rx_ch_num_flows;
 	ch->tx_count = common->tx_ch_num;
 }
 
@@ -447,9 +447,8 @@ static int am65_cpsw_set_channels(struct net_device *ndev,
 	if (common->usage_count)
 		return -EBUSY;
 
-	am65_cpsw_nuss_remove_tx_chns(common);
-
-	return am65_cpsw_nuss_update_tx_chns(common, chs->tx_count);
+	return am65_cpsw_nuss_update_tx_rx_chns(common, chs->tx_count,
+						chs->rx_count);
 }
 
 static void
@@ -714,8 +713,6 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev,
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
 	info->phc_index = am65_cpts_phc_index(common->cpts);
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
@@ -915,80 +912,64 @@ static void am65_cpsw_get_mm_stats(struct net_device *ndev,
 	s->MACMergeHoldCount = readl(base + AM65_CPSW_STATN_IET_TX_HOLD);
 }
 
-static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
-				  struct kernel_ethtool_coalesce *kernel_coal,
-				  struct netlink_ext_ack *extack)
-{
-	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
-	struct am65_cpsw_tx_chn *tx_chn;
-
-	tx_chn = &common->tx_chns[0];
-
-	coal->rx_coalesce_usecs = common->rx_pace_timeout / 1000;
-	coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
-
-	return 0;
-}
-
 static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
 					    struct ethtool_coalesce *coal)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+	struct am65_cpsw_rx_flow *rx_flow;
 	struct am65_cpsw_tx_chn *tx_chn;
 
-	if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+	if (queue >= AM65_CPSW_MAX_QUEUES)
 		return -EINVAL;
 
 	tx_chn = &common->tx_chns[queue];
-
 	coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
 
+	rx_flow = &common->rx_chns.flows[queue];
+	coal->rx_coalesce_usecs = rx_flow->rx_pace_timeout / 1000;
+
 	return 0;
 }
 
-static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
 				  struct kernel_ethtool_coalesce *kernel_coal,
 				  struct netlink_ext_ack *extack)
 {
-	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
-	struct am65_cpsw_tx_chn *tx_chn;
-
-	tx_chn = &common->tx_chns[0];
-
-	if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
-		return -EINVAL;
-
-	if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
-		return -EINVAL;
-
-	common->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
-	tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
-
-	return 0;
+	return am65_cpsw_get_per_queue_coalesce(ndev, 0, coal);
 }
 
 static int am65_cpsw_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
 					    struct ethtool_coalesce *coal)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+	struct am65_cpsw_rx_flow *rx_flow;
 	struct am65_cpsw_tx_chn *tx_chn;
 
-	if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+	if (queue >= AM65_CPSW_MAX_QUEUES)
 		return -EINVAL;
 
 	tx_chn = &common->tx_chns[queue];
-
-	if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) {
-		dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n",
-			 queue);
-		coal->tx_coalesce_usecs = 20;
-	}
+	if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
+		return -EINVAL;
 
 	tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
 
+	rx_flow = &common->rx_chns.flows[queue];
+	if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
+		return -EINVAL;
+
+	rx_flow->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
+
 	return 0;
 }
 
+static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+				  struct kernel_ethtool_coalesce *kernel_coal,
+				  struct netlink_ext_ack *extack)
+{
+	return am65_cpsw_set_per_queue_coalesce(ndev, 0, coal);
+}
+
 const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
 	.begin			= am65_cpsw_ethtool_op_begin,
 	.complete		= am65_cpsw_ethtool_op_complete,
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 81d9f21086ec..cbe99017cbfa 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -138,7 +138,7 @@
 	 AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN)
 
 #define AM65_CPSW_ALE_AGEOUT_DEFAULT	30
-/* Number of TX/RX descriptors */
+/* Number of TX/RX descriptors per channel/flow */
 #define AM65_CPSW_MAX_TX_DESC	500
 #define AM65_CPSW_MAX_RX_DESC	500
 
@@ -150,18 +150,20 @@
 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
 
 #define AM65_CPSW_DEFAULT_TX_CHNS	8
+#define AM65_CPSW_DEFAULT_RX_CHN_FLOWS	1
 
 /* CPPI streaming packet interface */
 #define AM65_CPSW_CPPI_TX_FLOW_ID  0x3FFF
 #define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7
 
 /* XDP */
-#define AM65_CPSW_XDP_CONSUMED 2
-#define AM65_CPSW_XDP_REDIRECT 1
+#define AM65_CPSW_XDP_CONSUMED BIT(1)
+#define AM65_CPSW_XDP_REDIRECT BIT(0)
 #define AM65_CPSW_XDP_PASS     0
 
 /* Include headroom compatible with both skb and xdpf */
-#define AM65_CPSW_HEADROOM (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)
+#define AM65_CPSW_HEADROOM_NA (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)
+#define AM65_CPSW_HEADROOM ALIGN(AM65_CPSW_HEADROOM_NA, sizeof(long))
 
 static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave,
 				      const u8 *dev_addr)
@@ -330,7 +332,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev,
 }
 
 static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
-				  struct page *page)
+				  struct page *page, u32 flow_idx)
 {
 	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
 	struct cppi5_host_desc_t *desc_rx;
@@ -363,7 +365,8 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
 	*((void **)swdata) = page_address(page);
 
-	return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, desc_rx, desc_dma);
+	return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_idx,
+					desc_rx, desc_dma);
 }
 
 void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common)
@@ -398,22 +401,27 @@ static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port);
 static void am65_cpsw_destroy_xdp_rxqs(struct am65_cpsw_common *common)
 {
 	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+	struct am65_cpsw_rx_flow *flow;
 	struct xdp_rxq_info *rxq;
-	int i;
+	int id, port;
 
-	for (i = 0; i < common->port_num; i++) {
-		if (!common->ports[i].ndev)
-			continue;
+	for (id = 0; id < common->rx_ch_num_flows; id++) {
+		flow = &rx_chn->flows[id];
 
-		rxq = &common->ports[i].xdp_rxq;
+		for (port = 0; port < common->port_num; port++) {
+			if (!common->ports[port].ndev)
+				continue;
 
-		if (xdp_rxq_info_is_reg(rxq))
-			xdp_rxq_info_unreg(rxq);
-	}
+			rxq = &common->ports[port].xdp_rxq[id];
+
+			if (xdp_rxq_info_is_reg(rxq))
+				xdp_rxq_info_unreg(rxq);
+		}
 
-	if (rx_chn->page_pool) {
-		page_pool_destroy(rx_chn->page_pool);
-		rx_chn->page_pool = NULL;
+		if (flow->page_pool) {
+			page_pool_destroy(flow->page_pool);
+			flow->page_pool = NULL;
+		}
 	}
 }
 
@@ -427,31 +435,44 @@ static int am65_cpsw_create_xdp_rxqs(struct am65_cpsw_common *common)
 		.nid = dev_to_node(common->dev),
 		.dev = common->dev,
 		.dma_dir = DMA_BIDIRECTIONAL,
-		.napi = &common->napi_rx,
+		/* .napi set dynamically */
 	};
+	struct am65_cpsw_rx_flow *flow;
 	struct xdp_rxq_info *rxq;
 	struct page_pool *pool;
-	int i, ret;
-
-	pool = page_pool_create(&pp_params);
-	if (IS_ERR(pool))
-		return PTR_ERR(pool);
+	int id, port, ret;
+
+	for (id = 0; id < common->rx_ch_num_flows; id++) {
+		flow = &rx_chn->flows[id];
+		pp_params.napi = &flow->napi_rx;
+		pool = page_pool_create(&pp_params);
+		if (IS_ERR(pool)) {
+			ret = PTR_ERR(pool);
+			goto err;
+		}
 
-	rx_chn->page_pool = pool;
+		flow->page_pool = pool;
 
-	for (i = 0; i < common->port_num; i++) {
-		if (!common->ports[i].ndev)
-			continue;
+		/* using same page pool is allowed as no running rx handlers
+		 * simultaneously for both ndevs
+		 */
+		for (port = 0; port < common->port_num; port++) {
+			if (!common->ports[port].ndev)
+				continue;
 
-		rxq = &common->ports[i].xdp_rxq;
+			rxq = &common->ports[port].xdp_rxq[id];
 
-		ret = xdp_rxq_info_reg(rxq, common->ports[i].ndev, i, 0);
-		if (ret)
-			goto err;
+			ret = xdp_rxq_info_reg(rxq, common->ports[port].ndev,
+					       id, flow->napi_rx.napi_id);
+			if (ret)
+				goto err;
 
-		ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
-		if (ret)
-			goto err;
+			ret = xdp_rxq_info_reg_mem_model(rxq,
+							 MEM_TYPE_PAGE_POOL,
+							 pool);
+			if (ret)
+				goto err;
+		}
 	}
 
 	return 0;
@@ -496,25 +517,27 @@ static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_ch
 								       desc_idx);
 }
 
-static inline void am65_cpsw_put_page(struct am65_cpsw_rx_chn *rx_chn,
+static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow,
 				      struct page *page,
 				      bool allow_direct,
 				      int desc_idx)
 {
-	page_pool_put_full_page(rx_chn->page_pool, page, allow_direct);
-	rx_chn->pages[desc_idx] = NULL;
+	page_pool_put_full_page(flow->page_pool, page, allow_direct);
+	flow->pages[desc_idx] = NULL;
 }
 
 static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
 {
-	struct am65_cpsw_rx_chn *rx_chn = data;
+	struct am65_cpsw_rx_flow *flow = data;
 	struct cppi5_host_desc_t *desc_rx;
+	struct am65_cpsw_rx_chn *rx_chn;
 	dma_addr_t buf_dma;
 	u32 buf_dma_len;
 	void *page_addr;
 	void **swdata;
 	int desc_idx;
 
+	rx_chn = &flow->common->rx_chns;
 	desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
 	page_addr = *swdata;
@@ -525,7 +548,7 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
 
 	desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
 					   rx_chn->dsize_log2);
-	am65_cpsw_put_page(rx_chn, virt_to_page(page_addr), false, desc_idx);
+	am65_cpsw_put_page(flow, virt_to_page(page_addr), false, desc_idx);
 }
 
 static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
@@ -601,7 +624,8 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 	struct am65_cpsw_host *host_p = am65_common_get_host(common);
 	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
 	struct am65_cpsw_tx_chn *tx_chn = common->tx_chns;
-	int port_idx, i, ret, tx;
+	int port_idx, i, ret, tx, flow_idx;
+	struct am65_cpsw_rx_flow *flow;
 	u32 val, port_mask;
 	struct page *page;
 
@@ -669,27 +693,26 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 		return ret;
 	}
 
-	for (i = 0; i < rx_chn->descs_num; i++) {
-		page = page_pool_dev_alloc_pages(rx_chn->page_pool);
-		if (!page) {
-			ret = -ENOMEM;
-			if (i)
+	for (flow_idx = 0; flow_idx < common->rx_ch_num_flows; flow_idx++) {
+		flow = &rx_chn->flows[flow_idx];
+		for (i = 0; i < AM65_CPSW_MAX_RX_DESC; i++) {
+			page = page_pool_dev_alloc_pages(flow->page_pool);
+			if (!page) {
+				dev_err(common->dev, "cannot allocate page in flow %d\n",
+					flow_idx);
+				ret = -ENOMEM;
 				goto fail_rx;
+			}
+			flow->pages[i] = page;
 
-			return ret;
-		}
-		rx_chn->pages[i] = page;
-
-		ret = am65_cpsw_nuss_rx_push(common, page);
-		if (ret < 0) {
-			dev_err(common->dev,
-				"cannot submit page to channel rx: %d\n",
-				ret);
-			am65_cpsw_put_page(rx_chn, page, false, i);
-			if (i)
+			ret = am65_cpsw_nuss_rx_push(common, page, flow_idx);
+			if (ret < 0) {
+				dev_err(common->dev,
+					"cannot submit page to rx channel flow %d, error %d\n",
+					flow_idx, ret);
+				am65_cpsw_put_page(flow, page, false, i);
 				goto fail_rx;
-
-			return ret;
+			}
 		}
 	}
 
@@ -699,6 +722,14 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 		goto fail_rx;
 	}
 
+	for (i = 0; i < common->rx_ch_num_flows ; i++) {
+		napi_enable(&rx_chn->flows[i].napi_rx);
+		if (rx_chn->flows[i].irq_disabled) {
+			rx_chn->flows[i].irq_disabled = false;
+			enable_irq(rx_chn->flows[i].irq);
+		}
+	}
+
 	for (tx = 0; tx < common->tx_ch_num; tx++) {
 		ret = k3_udma_glue_enable_tx_chn(tx_chn[tx].tx_chn);
 		if (ret) {
@@ -710,12 +741,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 		napi_enable(&tx_chn[tx].napi_tx);
 	}
 
-	napi_enable(&common->napi_rx);
-	if (common->rx_irq_disabled) {
-		common->rx_irq_disabled = false;
-		enable_irq(rx_chn->irq);
-	}
-
 	dev_dbg(common->dev, "cpsw_nuss started\n");
 	return 0;
 
@@ -726,11 +751,24 @@ fail_tx:
 		tx--;
 	}
 
+	for (flow_idx = 0; i < common->rx_ch_num_flows; flow_idx++) {
+		flow = &rx_chn->flows[flow_idx];
+		if (!flow->irq_disabled) {
+			disable_irq(flow->irq);
+			flow->irq_disabled = true;
+		}
+		napi_disable(&flow->napi_rx);
+	}
+
 	k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);
 
 fail_rx:
-	k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, 0, rx_chn,
-				  am65_cpsw_nuss_rx_cleanup, 0);
+	for (i = 0; i < common->rx_ch_num_flows; i--)
+		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
+					  am65_cpsw_nuss_rx_cleanup, 0);
+
+	am65_cpsw_destroy_xdp_rxqs(common);
+
 	return ret;
 }
 
@@ -779,12 +817,12 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
 			dev_err(common->dev, "rx teardown timeout\n");
 	}
 
-	napi_disable(&common->napi_rx);
-	hrtimer_cancel(&common->rx_hrtimer);
-
-	for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
-		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
-					  am65_cpsw_nuss_rx_cleanup, !!i);
+	for (i = 0; i < common->rx_ch_num_flows; i++) {
+		napi_disable(&rx_chn->flows[i].napi_rx);
+		hrtimer_cancel(&rx_chn->flows[i].rx_hrtimer);
+		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
+					  am65_cpsw_nuss_rx_cleanup, 0);
+	}
 
 	k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);
 
@@ -793,10 +831,6 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
 	writel(0, common->cpsw_base + AM65_CPSW_REG_CTL);
 	writel(0, common->cpsw_base + AM65_CPSW_REG_STAT_PORT_EN);
 
-	for (i = 0; i < rx_chn->descs_num; i++) {
-		if (rx_chn->pages[i])
-			am65_cpsw_put_page(rx_chn, rx_chn->pages[i], false, i);
-	}
 	am65_cpsw_destroy_xdp_rxqs(common);
 
 	dev_dbg(common->dev, "cpsw_nuss stopped\n");
@@ -867,7 +901,7 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
 		goto runtime_put;
 	}
 
-	ret = netif_set_real_num_rx_queues(ndev, AM65_CPSW_MAX_RX_QUEUES);
+	ret = netif_set_real_num_rx_queues(ndev, common->rx_ch_num_flows);
 	if (ret) {
 		dev_err(common->dev, "cannot set real number of rx queues\n");
 		goto runtime_put;
@@ -933,7 +967,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
 	host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
 	if (unlikely(!host_desc)) {
 		ndev->stats.tx_dropped++;
-		return -ENOMEM;
+		return AM65_CPSW_XDP_CONSUMED;	/* drop */
 	}
 
 	am65_cpsw_nuss_set_buf_type(tx_chn, host_desc, buf_type);
@@ -942,7 +976,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
 				 pkt_len, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(tx_chn->dma_dev, dma_buf))) {
 		ndev->stats.tx_dropped++;
-		ret = -ENOMEM;
+		ret = AM65_CPSW_XDP_CONSUMED;	/* drop */
 		goto pool_free;
 	}
 
@@ -977,6 +1011,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
 		/* Inform BQL */
 		netdev_tx_completed_queue(netif_txq, 1, pkt_len);
 		ndev->stats.tx_errors++;
+		ret = AM65_CPSW_XDP_CONSUMED; /* drop */
 		goto dma_unmap;
 	}
 
@@ -990,13 +1025,15 @@ pool_free:
 	return ret;
 }
 
-static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
+static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
 			     struct am65_cpsw_port *port,
 			     struct xdp_buff *xdp,
 			     int desc_idx, int cpu, int *len)
 {
-	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+	struct am65_cpsw_common *common = flow->common;
+	struct am65_cpsw_ndev_priv *ndev_priv;
 	struct net_device *ndev = port->ndev;
+	struct am65_cpsw_ndev_stats *stats;
 	int ret = AM65_CPSW_XDP_CONSUMED;
 	struct am65_cpsw_tx_chn *tx_chn;
 	struct netdev_queue *netif_txq;
@@ -1004,6 +1041,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
 	struct bpf_prog *prog;
 	struct page *page;
 	u32 act;
+	int err;
 
 	prog = READ_ONCE(port->xdp_prog);
 	if (!prog)
@@ -1013,41 +1051,49 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
 	/* XDP prog might have changed packet data and boundaries */
 	*len = xdp->data_end - xdp->data;
 
+	ndev_priv = netdev_priv(ndev);
+	stats = this_cpu_ptr(ndev_priv->stats);
+
 	switch (act) {
 	case XDP_PASS:
 		ret = AM65_CPSW_XDP_PASS;
 		goto out;
 	case XDP_TX:
-		tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES];
+		tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_QUEUES];
 		netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
 
 		xdpf = xdp_convert_buff_to_frame(xdp);
 		if (unlikely(!xdpf))
-			break;
+			goto drop;
 
 		__netif_tx_lock(netif_txq, cpu);
-		ret = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf,
+		err = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf,
 					     AM65_CPSW_TX_BUF_TYPE_XDP_TX);
 		__netif_tx_unlock(netif_txq);
-		if (ret)
-			break;
+		if (err)
+			goto drop;
 
-		ndev->stats.rx_bytes += *len;
-		ndev->stats.rx_packets++;
+		u64_stats_update_begin(&stats->syncp);
+		stats->rx_bytes += *len;
+		stats->rx_packets++;
+		u64_stats_update_end(&stats->syncp);
 		ret = AM65_CPSW_XDP_CONSUMED;
 		goto out;
 	case XDP_REDIRECT:
 		if (unlikely(xdp_do_redirect(ndev, xdp, prog)))
-			break;
+			goto drop;
 
-		ndev->stats.rx_bytes += *len;
-		ndev->stats.rx_packets++;
+		u64_stats_update_begin(&stats->syncp);
+		stats->rx_bytes += *len;
+		stats->rx_packets++;
+		u64_stats_update_end(&stats->syncp);
 		ret = AM65_CPSW_XDP_REDIRECT;
 		goto out;
 	default:
 		bpf_warn_invalid_xdp_action(ndev, prog, act);
 		fallthrough;
 	case XDP_ABORTED:
+drop:
 		trace_xdp_exception(ndev, prog, act);
 		fallthrough;
 	case XDP_DROP:
@@ -1055,7 +1101,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common,
 	}
 
 	page = virt_to_head_page(xdp->data);
-	am65_cpsw_put_page(rx_chn, page, true, desc_idx);
+	am65_cpsw_put_page(flow, page, true, desc_idx);
 
 out:
 	return ret;
@@ -1094,11 +1140,12 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info)
 	}
 }
 
-static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
-				     u32 flow_idx, int cpu)
+static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
+				     int cpu, int *xdp_state)
 {
-	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
+	struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns;
 	u32 buf_dma_len, pkt_len, port_id = 0, csum_info;
+	struct am65_cpsw_common *common = flow->common;
 	struct am65_cpsw_ndev_priv *ndev_priv;
 	struct am65_cpsw_ndev_stats *stats;
 	struct cppi5_host_desc_t *desc_rx;
@@ -1108,12 +1155,14 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
 	struct am65_cpsw_port *port;
 	int headroom, desc_idx, ret;
 	struct net_device *ndev;
+	u32 flow_idx = flow->id;
 	struct sk_buff *skb;
 	struct xdp_buff	xdp;
 	void *page_addr;
 	void **swdata;
 	u32 *psdata;
 
+	*xdp_state = AM65_CPSW_XDP_PASS;
 	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_idx, &desc_dma);
 	if (ret) {
 		if (ret != -ENODATA)
@@ -1161,15 +1210,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
 	}
 
 	if (port->xdp_prog) {
-		xdp_init_buff(&xdp, AM65_CPSW_MAX_PACKET_SIZE, &port->xdp_rxq);
-
-		xdp_prepare_buff(&xdp, page_addr, skb_headroom(skb),
+		xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
+		xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
 				 pkt_len, false);
-
-		ret = am65_cpsw_run_xdp(common, port, &xdp, desc_idx,
-					cpu, &pkt_len);
-		if (ret != AM65_CPSW_XDP_PASS)
-			return ret;
+		*xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, desc_idx,
+					       cpu, &pkt_len);
+		if (*xdp_state != AM65_CPSW_XDP_PASS)
+			goto allocate;
 
 		/* Compute additional headroom to be reserved */
 		headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb);
@@ -1184,7 +1231,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
 	skb_mark_for_recycle(skb);
 	skb->protocol = eth_type_trans(skb, ndev);
 	am65_cpsw_nuss_rx_csum(skb, csum_info);
-	napi_gro_receive(&common->napi_rx, skb);
+	napi_gro_receive(&flow->napi_rx, skb);
 
 	stats = this_cpu_ptr(ndev_priv->stats);
 
@@ -1193,21 +1240,25 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
 	stats->rx_bytes += pkt_len;
 	u64_stats_update_end(&stats->syncp);
 
-	new_page = page_pool_dev_alloc_pages(rx_chn->page_pool);
-	if (unlikely(!new_page))
+allocate:
+	new_page = page_pool_dev_alloc_pages(flow->page_pool);
+	if (unlikely(!new_page)) {
+		dev_err(dev, "page alloc failed\n");
 		return -ENOMEM;
-	rx_chn->pages[desc_idx] = new_page;
+	}
+
+	flow->pages[desc_idx] = new_page;
 
 	if (netif_dormant(ndev)) {
-		am65_cpsw_put_page(rx_chn, new_page, true, desc_idx);
+		am65_cpsw_put_page(flow, new_page, true, desc_idx);
 		ndev->stats.rx_dropped++;
 		return 0;
 	}
 
 requeue:
-	ret = am65_cpsw_nuss_rx_push(common, new_page);
+	ret = am65_cpsw_nuss_rx_push(common, new_page, flow_idx);
 	if (WARN_ON(ret < 0)) {
-		am65_cpsw_put_page(rx_chn, new_page, true, desc_idx);
+		am65_cpsw_put_page(flow, new_page, true, desc_idx);
 		ndev->stats.rx_errors++;
 		ndev->stats.rx_dropped++;
 	}
@@ -1217,54 +1268,48 @@ requeue:
 
 static enum hrtimer_restart am65_cpsw_nuss_rx_timer_callback(struct hrtimer *timer)
 {
-	struct am65_cpsw_common *common =
-			container_of(timer, struct am65_cpsw_common, rx_hrtimer);
+	struct am65_cpsw_rx_flow *flow = container_of(timer,
+						      struct am65_cpsw_rx_flow,
+						      rx_hrtimer);
 
-	enable_irq(common->rx_chns.irq);
+	enable_irq(flow->irq);
 	return HRTIMER_NORESTART;
 }
 
 static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
 {
-	struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx);
-	int flow = AM65_CPSW_MAX_RX_FLOWS;
+	struct am65_cpsw_rx_flow *flow = am65_cpsw_napi_to_rx_flow(napi_rx);
+	struct am65_cpsw_common *common = flow->common;
 	int cpu = smp_processor_id();
-	bool xdp_redirect = false;
+	int xdp_state_or = 0;
 	int cur_budget, ret;
+	int xdp_state;
 	int num_rx = 0;
 
-	/* process every flow */
-	while (flow--) {
-		cur_budget = budget - num_rx;
-
-		while (cur_budget--) {
-			ret = am65_cpsw_nuss_rx_packets(common, flow, cpu);
-			if (ret) {
-				if (ret == AM65_CPSW_XDP_REDIRECT)
-					xdp_redirect = true;
-				break;
-			}
-			num_rx++;
-		}
-
-		if (num_rx >= budget)
+	/* process only this flow */
+	cur_budget = budget;
+	while (cur_budget--) {
+		ret = am65_cpsw_nuss_rx_packets(flow, cpu, &xdp_state);
+		xdp_state_or |= xdp_state;
+		if (ret)
 			break;
+		num_rx++;
 	}
 
-	if (xdp_redirect)
+	if (xdp_state_or & AM65_CPSW_XDP_REDIRECT)
 		xdp_do_flush();
 
 	dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
 
 	if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
-		if (common->rx_irq_disabled) {
-			common->rx_irq_disabled = false;
-			if (unlikely(common->rx_pace_timeout)) {
-				hrtimer_start(&common->rx_hrtimer,
-					      ns_to_ktime(common->rx_pace_timeout),
+		if (flow->irq_disabled) {
+			flow->irq_disabled = false;
+			if (unlikely(flow->rx_pace_timeout)) {
+				hrtimer_start(&flow->rx_hrtimer,
+					      ns_to_ktime(flow->rx_pace_timeout),
 					      HRTIMER_MODE_REL_PINNED);
 			} else {
-				enable_irq(common->rx_chns.irq);
+				enable_irq(flow->irq);
 			}
 		}
 	}
@@ -1512,11 +1557,11 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
 
 static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
 {
-	struct am65_cpsw_common *common = dev_id;
+	struct am65_cpsw_rx_flow *flow = dev_id;
 
-	common->rx_irq_disabled = true;
+	flow->irq_disabled = true;
 	disable_irq_nosync(irq);
-	napi_schedule(&common->napi_rx);
+	napi_schedule(&flow->napi_rx);
 
 	return IRQ_HANDLED;
 }
@@ -1918,12 +1963,13 @@ static int am65_cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
 static int am65_cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
 				  struct xdp_frame **frames, u32 flags)
 {
+	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
 	struct am65_cpsw_tx_chn *tx_chn;
 	struct netdev_queue *netif_txq;
 	int cpu = smp_processor_id();
 	int i, nxmit = 0;
 
-	tx_chn = &am65_ndev_to_common(ndev)->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES];
+	tx_chn = &common->tx_chns[cpu % common->tx_ch_num];
 	netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
 
 	__netif_tx_lock(netif_txq, cpu);
@@ -2160,7 +2206,7 @@ static void am65_cpsw_nuss_free_tx_chns(void *data)
 	}
 }
 
-void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
 {
 	struct device *dev = common->dev;
 	int i;
@@ -2175,15 +2221,9 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
 			devm_free_irq(dev, tx_chn->irq, tx_chn);
 
 		netif_napi_del(&tx_chn->napi_tx);
-
-		if (!IS_ERR_OR_NULL(tx_chn->desc_pool))
-			k3_cppi_desc_pool_destroy(tx_chn->desc_pool);
-
-		if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
-			k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
-
-		memset(tx_chn, 0, sizeof(*tx_chn));
 	}
+
+	am65_cpsw_nuss_free_tx_chns(common);
 }
 
 static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
@@ -2315,19 +2355,22 @@ static void am65_cpsw_nuss_free_rx_chns(void *data)
 		k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
 }
 
-static void am65_cpsw_nuss_remove_rx_chns(void *data)
+static void am65_cpsw_nuss_remove_rx_chns(struct am65_cpsw_common *common)
 {
-	struct am65_cpsw_common *common = data;
 	struct device *dev = common->dev;
 	struct am65_cpsw_rx_chn *rx_chn;
+	struct am65_cpsw_rx_flow *flows;
+	int i;
 
 	rx_chn = &common->rx_chns;
+	flows = rx_chn->flows;
 	devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common);
 
-	if (!(rx_chn->irq < 0))
-		devm_free_irq(dev, rx_chn->irq, common);
-
-	netif_napi_del(&common->napi_rx);
+	for (i = 0; i < common->rx_ch_num_flows; i++) {
+		if (!(flows[i].irq < 0))
+			devm_free_irq(dev, flows[i].irq, &flows[i]);
+		netif_napi_del(&flows[i].napi_rx);
+	}
 
 	am65_cpsw_nuss_free_rx_chns(common);
 
@@ -2340,6 +2383,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 	struct k3_udma_glue_rx_channel_cfg rx_cfg = { 0 };
 	u32  max_desc_num = AM65_CPSW_MAX_RX_DESC;
 	struct device *dev = common->dev;
+	struct am65_cpsw_rx_flow *flow;
 	u32 hdesc_size, hdesc_size_out;
 	u32 fdqring_id;
 	int i, ret = 0;
@@ -2348,12 +2392,21 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 					   AM65_CPSW_NAV_SW_DATA_SIZE);
 
 	rx_cfg.swdata_size = AM65_CPSW_NAV_SW_DATA_SIZE;
-	rx_cfg.flow_id_num = AM65_CPSW_MAX_RX_FLOWS;
+	rx_cfg.flow_id_num = common->rx_ch_num_flows;
 	rx_cfg.flow_id_base = common->rx_flow_id_base;
 
 	/* init all flows */
 	rx_chn->dev = dev;
-	rx_chn->descs_num = max_desc_num;
+	rx_chn->descs_num = max_desc_num * rx_cfg.flow_id_num;
+
+	for (i = 0; i < common->rx_ch_num_flows; i++) {
+		flow = &rx_chn->flows[i];
+		flow->page_pool = NULL;
+		flow->pages = devm_kcalloc(dev, AM65_CPSW_MAX_RX_DESC,
+					   sizeof(*flow->pages), GFP_KERNEL);
+		if (!flow->pages)
+			return -ENOMEM;
+	}
 
 	rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg);
 	if (IS_ERR(rx_chn->rx_chn)) {
@@ -2376,13 +2429,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 	rx_chn->dsize_log2 = __fls(hdesc_size_out);
 	WARN_ON(hdesc_size_out != (1 << rx_chn->dsize_log2));
 
-	rx_chn->page_pool = NULL;
-
-	rx_chn->pages = devm_kcalloc(dev, rx_chn->descs_num,
-				     sizeof(*rx_chn->pages), GFP_KERNEL);
-	if (!rx_chn->pages)
-		return -ENOMEM;
-
 	common->rx_flow_id_base =
 			k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn);
 	dev_info(dev, "set new flow-id-base %u\n", common->rx_flow_id_base);
@@ -2406,6 +2452,10 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 				K3_UDMA_GLUE_SRC_TAG_LO_USE_REMOTE_SRC_TAG,
 		};
 
+		flow = &rx_chn->flows[i];
+		flow->id = i;
+		flow->common = common;
+
 		rx_flow_cfg.ring_rxfdq0_id = fdqring_id;
 		rx_flow_cfg.rx_cfg.size = max_desc_num;
 		rx_flow_cfg.rxfdq_cfg.size = max_desc_num;
@@ -2422,30 +2472,37 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 				k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn,
 								i);
 
-		rx_chn->irq = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
-
-		if (rx_chn->irq < 0) {
+		flow->irq = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
+		if (flow->irq <= 0) {
 			dev_err(dev, "Failed to get rx dma irq %d\n",
-				rx_chn->irq);
-			ret = rx_chn->irq;
+				flow->irq);
+			ret = flow->irq;
 			goto err;
 		}
-	}
 
-	netif_napi_add(common->dma_ndev, &common->napi_rx,
-		       am65_cpsw_nuss_rx_poll);
-	hrtimer_init(&common->rx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-	common->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
-
-	ret = devm_request_irq(dev, rx_chn->irq,
-			       am65_cpsw_nuss_rx_irq,
-			       IRQF_TRIGGER_HIGH, dev_name(dev), common);
-	if (ret) {
-		dev_err(dev, "failure requesting rx irq %u, %d\n",
-			rx_chn->irq, ret);
-		goto err;
+		snprintf(flow->name,
+			 sizeof(flow->name), "%s-rx%d",
+			 dev_name(dev), i);
+		netif_napi_add(common->dma_ndev, &flow->napi_rx,
+			       am65_cpsw_nuss_rx_poll);
+		hrtimer_init(&flow->rx_hrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_PINNED);
+		flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
+
+		ret = devm_request_irq(dev, flow->irq,
+				       am65_cpsw_nuss_rx_irq,
+				       IRQF_TRIGGER_HIGH,
+				       flow->name, flow);
+		if (ret) {
+			dev_err(dev, "failure requesting rx %d irq %u, %d\n",
+				i, flow->irq, ret);
+			goto err;
+		}
 	}
 
+	/* setup classifier to route priorities to flows */
+	cpsw_ale_classifier_setup_default(common->ale, common->rx_ch_num_flows);
+
 err:
 	i = devm_add_action(dev, am65_cpsw_nuss_free_rx_chns, common);
 	if (i) {
@@ -2689,8 +2746,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
 	/* alloc netdev */
 	port->ndev = devm_alloc_etherdev_mqs(common->dev,
 					     sizeof(struct am65_cpsw_ndev_priv),
-					     AM65_CPSW_MAX_TX_QUEUES,
-					     AM65_CPSW_MAX_RX_QUEUES);
+					     AM65_CPSW_MAX_QUEUES,
+					     AM65_CPSW_MAX_QUEUES);
 	if (!port->ndev) {
 		dev_err(dev, "error allocating slave net_device %u\n",
 			port->port_id);
@@ -2761,7 +2818,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
 	}
 
 	phylink = phylink_create(&port->slave.phylink_config,
-				 of_node_to_fwnode(port->slave.port_np),
+				 of_fwnode_handle(port->slave.port_np),
 				 port->slave.phy_if,
 				 &am65_cpsw_phylink_mac_ops);
 	if (IS_ERR(phylink))
@@ -3287,9 +3344,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
 		k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
 	}
 
-	for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
-		k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
-					  am65_cpsw_nuss_rx_cleanup, !!i);
+	for (i = 0; i < common->rx_ch_num_flows; i++)
+		k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i,
+					  &rx_chan->flows[i],
+					  am65_cpsw_nuss_rx_cleanup, 0);
 
 	k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
 
@@ -3330,12 +3388,21 @@ err_cleanup_ndev:
 	return ret;
 }
 
-int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx)
+int am65_cpsw_nuss_update_tx_rx_chns(struct am65_cpsw_common *common,
+				     int num_tx, int num_rx)
 {
 	int ret;
 
+	am65_cpsw_nuss_remove_tx_chns(common);
+	am65_cpsw_nuss_remove_rx_chns(common);
+
 	common->tx_ch_num = num_tx;
+	common->rx_ch_num_flows = num_rx;
 	ret = am65_cpsw_nuss_init_tx_chns(common);
+	if (ret)
+		return ret;
+
+	ret = am65_cpsw_nuss_init_rx_chns(common);
 
 	return ret;
 }
@@ -3465,6 +3532,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 	common->rx_flow_id_base = -1;
 	init_completion(&common->tdown_complete);
 	common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS;
+	common->rx_ch_num_flows = AM65_CPSW_DEFAULT_RX_CHN_FLOWS;
 	common->pf_p0_rx_ptype_rrobin = false;
 	common->default_vlan = 1;
 
@@ -3656,8 +3724,10 @@ static int am65_cpsw_nuss_resume(struct device *dev)
 		return ret;
 
 	/* If RX IRQ was disabled before suspend, keep it disabled */
-	if (common->rx_irq_disabled)
-		disable_irq(common->rx_chns.irq);
+	for (i = 0; i < common->rx_ch_num_flows; i++) {
+		if (common->rx_chns.flows[i].irq_disabled)
+			disable_irq(common->rx_chns.flows[i].irq);
+	}
 
 	am65_cpts_resume(common->cpts);
 
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index e2ce2be320bd..dc8d544230dc 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -21,9 +21,7 @@ struct am65_cpts;
 
 #define HOST_PORT_NUM		0
 
-#define AM65_CPSW_MAX_TX_QUEUES	8
-#define AM65_CPSW_MAX_RX_QUEUES	1
-#define AM65_CPSW_MAX_RX_FLOWS	1
+#define AM65_CPSW_MAX_QUEUES	8	/* both TX & RX */
 
 #define AM65_CPSW_PORT_VLAN_REG_OFFSET	0x014
 
@@ -58,7 +56,7 @@ struct am65_cpsw_port {
 	struct am65_cpsw_qos		qos;
 	struct devlink_port		devlink_port;
 	struct bpf_prog			*xdp_prog;
-	struct xdp_rxq_info		xdp_rxq;
+	struct xdp_rxq_info		xdp_rxq[AM65_CPSW_MAX_QUEUES];
 	/* Only for suspend resume context */
 	u32				vid_context;
 };
@@ -94,16 +92,27 @@ struct am65_cpsw_tx_chn {
 	u32 rate_mbps;
 };
 
+struct am65_cpsw_rx_flow {
+	u32 id;
+	struct napi_struct napi_rx;
+	struct am65_cpsw_common	*common;
+	int irq;
+	bool irq_disabled;
+	struct hrtimer rx_hrtimer;
+	unsigned long rx_pace_timeout;
+	struct page_pool *page_pool;
+	struct page **pages;
+	char name[32];
+};
+
 struct am65_cpsw_rx_chn {
 	struct device *dev;
 	struct device *dma_dev;
 	struct k3_cppi_desc_pool *desc_pool;
 	struct k3_udma_glue_rx_channel *rx_chn;
-	struct page_pool *page_pool;
-	struct page **pages;
 	u32 descs_num;
 	unsigned char dsize_log2;
-	int irq;
+	struct am65_cpsw_rx_flow flows[AM65_CPSW_MAX_QUEUES];
 };
 
 #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0)
@@ -145,16 +154,12 @@ struct am65_cpsw_common {
 	u32			tx_ch_rate_msk;
 	u32			rx_flow_id_base;
 
-	struct am65_cpsw_tx_chn	tx_chns[AM65_CPSW_MAX_TX_QUEUES];
+	struct am65_cpsw_tx_chn	tx_chns[AM65_CPSW_MAX_QUEUES];
 	struct completion	tdown_complete;
 	atomic_t		tdown_cnt;
 
+	int			rx_ch_num_flows;
 	struct am65_cpsw_rx_chn	rx_chns;
-	struct napi_struct	napi_rx;
-
-	bool			rx_irq_disabled;
-	struct hrtimer		rx_hrtimer;
-	unsigned long		rx_pace_timeout;
 
 	u32			nuss_ver;
 	u32			cpsw_ver;
@@ -203,8 +208,8 @@ struct am65_cpsw_ndev_priv {
 #define am65_common_get_host(common) (&(common)->host)
 #define am65_common_get_port(common, id) (&(common)->ports[(id) - 1])
 
-#define am65_cpsw_napi_to_common(pnapi) \
-	container_of(pnapi, struct am65_cpsw_common, napi_rx)
+#define am65_cpsw_napi_to_rx_flow(pnapi) \
+	container_of(pnapi, struct am65_cpsw_rx_flow, napi_rx)
 #define am65_cpsw_napi_to_tx_chn(pnapi) \
 	container_of(pnapi, struct am65_cpsw_tx_chn, napi_tx)
 
@@ -215,8 +220,8 @@ struct am65_cpsw_ndev_priv {
 extern const struct ethtool_ops am65_cpsw_ethtool_ops_slave;
 
 void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common);
-void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common);
-int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx);
+int am65_cpsw_nuss_update_tx_rx_chns(struct am65_cpsw_common *common,
+				     int num_tx, int num_rx);
 
 bool am65_cpsw_port_dev_check(const struct net_device *dev);
 
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 64bf22cd860c..0d5d8917c70b 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/err.h>
@@ -45,6 +46,24 @@
 #define ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS	0x9C
 #define ALE_VLAN_MASK_MUX(reg)			(0xc0 + (0x4 * (reg)))
 
+#define ALE_POLICER_PORT_OUI		0x100
+#define ALE_POLICER_DA_SA		0x104
+#define ALE_POLICER_VLAN		0x108
+#define ALE_POLICER_ETHERTYPE_IPSA	0x10c
+#define ALE_POLICER_IPDA		0x110
+#define ALE_POLICER_PIR			0x118
+#define ALE_POLICER_CIR			0x11c
+#define ALE_POLICER_TBL_CTL		0x120
+#define ALE_POLICER_CTL			0x124
+#define ALE_POLICER_TEST_CTL		0x128
+#define ALE_POLICER_HIT_STATUS		0x12c
+#define ALE_THREAD_DEF			0x134
+#define ALE_THREAD_CTL			0x138
+#define ALE_THREAD_VAL			0x13c
+
+#define ALE_POLICER_TBL_WRITE_ENABLE	BIT(31)
+#define ALE_POLICER_TBL_INDEX_MASK	GENMASK(4, 0)
+
 #define AM65_CPSW_ALE_THREAD_DEF_REG 0x134
 
 /* ALE_AGING_TIMER */
@@ -76,7 +95,7 @@ enum {
  * @dev_id: ALE version/SoC id
  * @features: features supported by ALE
  * @tbl_entries: number of ALE entries
- * @major_ver_mask: mask of ALE Major Version Value in ALE_IDVER reg.
+ * @reg_fields: pointer to array of register field configuration
  * @nu_switch_ale: NU Switch ALE
  * @vlan_entry_tbl: ALE vlan entry fields description tbl
  */
@@ -84,7 +103,7 @@ struct cpsw_ale_dev_id {
 	const char *dev_id;
 	u32 features;
 	u32 tbl_entries;
-	u32 major_ver_mask;
+	const struct reg_field *reg_fields;
 	bool nu_switch_ale;
 	const struct ale_entry_fld *vlan_entry_tbl;
 };
@@ -102,7 +121,7 @@ struct cpsw_ale_dev_id {
 #define ALE_UCAST_TOUCHED		3
 
 #define ALE_TABLE_SIZE_MULTIPLIER	1024
-#define ALE_STATUS_SIZE_MASK		0x1f
+#define ALE_POLICER_SIZE_MULTIPLIER	8
 
 static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
 {
@@ -1292,25 +1311,108 @@ void cpsw_ale_stop(struct cpsw_ale *ale)
 	cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0);
 }
 
+static const struct reg_field ale_fields_cpsw[] = {
+	/* CPSW_ALE_IDVER_REG */
+	[MINOR_VER]	= REG_FIELD(ALE_IDVER, 0, 7),
+	[MAJOR_VER]	= REG_FIELD(ALE_IDVER, 8, 15),
+};
+
+static const struct reg_field ale_fields_cpsw_nu[] = {
+	/* CPSW_ALE_IDVER_REG */
+	[MINOR_VER]	= REG_FIELD(ALE_IDVER, 0, 7),
+	[MAJOR_VER]	= REG_FIELD(ALE_IDVER, 8, 10),
+	/* CPSW_ALE_STATUS_REG */
+	[ALE_ENTRIES]	= REG_FIELD(ALE_STATUS, 0, 7),
+	[ALE_POLICERS]	= REG_FIELD(ALE_STATUS, 8, 15),
+	/* CPSW_ALE_POLICER_PORT_OUI_REG */
+	[POL_PORT_MEN]	= REG_FIELD(ALE_POLICER_PORT_OUI, 31, 31),
+	[POL_TRUNK_ID]	= REG_FIELD(ALE_POLICER_PORT_OUI, 30, 30),
+	[POL_PORT_NUM]	= REG_FIELD(ALE_POLICER_PORT_OUI, 25, 25),
+	[POL_PRI_MEN]	= REG_FIELD(ALE_POLICER_PORT_OUI, 19, 19),
+	[POL_PRI_VAL]	= REG_FIELD(ALE_POLICER_PORT_OUI, 16, 18),
+	[POL_OUI_MEN]	= REG_FIELD(ALE_POLICER_PORT_OUI, 15, 15),
+	[POL_OUI_INDEX]	= REG_FIELD(ALE_POLICER_PORT_OUI, 0, 5),
+
+	/* CPSW_ALE_POLICER_DA_SA_REG */
+	[POL_DST_MEN]	= REG_FIELD(ALE_POLICER_DA_SA, 31, 31),
+	[POL_DST_INDEX]	= REG_FIELD(ALE_POLICER_DA_SA, 16, 21),
+	[POL_SRC_MEN]	= REG_FIELD(ALE_POLICER_DA_SA, 15, 15),
+	[POL_SRC_INDEX]	= REG_FIELD(ALE_POLICER_DA_SA, 0, 5),
+
+	/* CPSW_ALE_POLICER_VLAN_REG */
+	[POL_OVLAN_MEN]		= REG_FIELD(ALE_POLICER_VLAN, 31, 31),
+	[POL_OVLAN_INDEX]	= REG_FIELD(ALE_POLICER_VLAN, 16, 21),
+	[POL_IVLAN_MEN]		= REG_FIELD(ALE_POLICER_VLAN, 15, 15),
+	[POL_IVLAN_INDEX]	= REG_FIELD(ALE_POLICER_VLAN, 0, 5),
+
+	/* CPSW_ALE_POLICER_ETHERTYPE_IPSA_REG */
+	[POL_ETHERTYPE_MEN]	= REG_FIELD(ALE_POLICER_ETHERTYPE_IPSA, 31, 31),
+	[POL_ETHERTYPE_INDEX]	= REG_FIELD(ALE_POLICER_ETHERTYPE_IPSA, 16, 21),
+	[POL_IPSRC_MEN]		= REG_FIELD(ALE_POLICER_ETHERTYPE_IPSA, 15, 15),
+	[POL_IPSRC_INDEX]	= REG_FIELD(ALE_POLICER_ETHERTYPE_IPSA, 0, 5),
+
+	/* CPSW_ALE_POLICER_IPDA_REG */
+	[POL_IPDST_MEN]		= REG_FIELD(ALE_POLICER_IPDA, 31, 31),
+	[POL_IPDST_INDEX]	= REG_FIELD(ALE_POLICER_IPDA, 16, 21),
+
+	/* CPSW_ALE_POLICER_TBL_CTL_REG */
+	/**
+	 * REG_FIELDS not defined for this as fields cannot be correctly
+	 * used independently
+	 */
+
+	/* CPSW_ALE_POLICER_CTL_REG */
+	[POL_EN]		= REG_FIELD(ALE_POLICER_CTL, 31, 31),
+	[POL_RED_DROP_EN]	= REG_FIELD(ALE_POLICER_CTL, 29, 29),
+	[POL_YELLOW_DROP_EN]	= REG_FIELD(ALE_POLICER_CTL, 28, 28),
+	[POL_YELLOW_THRESH]	= REG_FIELD(ALE_POLICER_CTL, 24, 26),
+	[POL_POL_MATCH_MODE]	= REG_FIELD(ALE_POLICER_CTL, 22, 23),
+	[POL_PRIORITY_THREAD_EN] = REG_FIELD(ALE_POLICER_CTL, 21, 21),
+	[POL_MAC_ONLY_DEF_DIS]	= REG_FIELD(ALE_POLICER_CTL, 20, 20),
+
+	/* CPSW_ALE_POLICER_TEST_CTL_REG */
+	[POL_TEST_CLR]		= REG_FIELD(ALE_POLICER_TEST_CTL, 31, 31),
+	[POL_TEST_CLR_RED]	= REG_FIELD(ALE_POLICER_TEST_CTL, 30, 30),
+	[POL_TEST_CLR_YELLOW]	= REG_FIELD(ALE_POLICER_TEST_CTL, 29, 29),
+	[POL_TEST_CLR_SELECTED]	= REG_FIELD(ALE_POLICER_TEST_CTL, 28, 28),
+	[POL_TEST_ENTRY]	= REG_FIELD(ALE_POLICER_TEST_CTL, 0, 4),
+
+	/* CPSW_ALE_POLICER_HIT_STATUS_REG */
+	[POL_STATUS_HIT]	= REG_FIELD(ALE_POLICER_HIT_STATUS, 31, 31),
+	[POL_STATUS_HIT_RED]	= REG_FIELD(ALE_POLICER_HIT_STATUS, 30, 30),
+	[POL_STATUS_HIT_YELLOW]	= REG_FIELD(ALE_POLICER_HIT_STATUS, 29, 29),
+
+	/* CPSW_ALE_THREAD_DEF_REG */
+	[ALE_DEFAULT_THREAD_EN]		= REG_FIELD(ALE_THREAD_DEF, 15, 15),
+	[ALE_DEFAULT_THREAD_VAL]	= REG_FIELD(ALE_THREAD_DEF, 0, 5),
+
+	/* CPSW_ALE_THREAD_CTL_REG */
+	[ALE_THREAD_CLASS_INDEX] = REG_FIELD(ALE_THREAD_CTL, 0, 4),
+
+	/* CPSW_ALE_THREAD_VAL_REG */
+	[ALE_THREAD_ENABLE]	= REG_FIELD(ALE_THREAD_VAL, 15, 15),
+	[ALE_THREAD_VALUE]	= REG_FIELD(ALE_THREAD_VAL, 0, 5),
+};
+
 static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = {
 	{
 		/* am3/4/5, dra7. dm814x, 66ak2hk-gbe */
 		.dev_id = "cpsw",
 		.tbl_entries = 1024,
-		.major_ver_mask = 0xff,
+		.reg_fields = ale_fields_cpsw,
 		.vlan_entry_tbl = vlan_entry_cpsw,
 	},
 	{
 		/* 66ak2h_xgbe */
 		.dev_id = "66ak2h-xgbe",
 		.tbl_entries = 2048,
-		.major_ver_mask = 0xff,
+		.reg_fields = ale_fields_cpsw,
 		.vlan_entry_tbl = vlan_entry_cpsw,
 	},
 	{
 		.dev_id = "66ak2el",
 		.features = CPSW_ALE_F_STATUS_REG,
-		.major_ver_mask = 0x7,
+		.reg_fields = ale_fields_cpsw_nu,
 		.nu_switch_ale = true,
 		.vlan_entry_tbl = vlan_entry_nu,
 	},
@@ -1318,7 +1420,7 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = {
 		.dev_id = "66ak2g",
 		.features = CPSW_ALE_F_STATUS_REG,
 		.tbl_entries = 64,
-		.major_ver_mask = 0x7,
+		.reg_fields = ale_fields_cpsw_nu,
 		.nu_switch_ale = true,
 		.vlan_entry_tbl = vlan_entry_nu,
 	},
@@ -1326,20 +1428,20 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = {
 		.dev_id = "am65x-cpsw2g",
 		.features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING,
 		.tbl_entries = 64,
-		.major_ver_mask = 0x7,
+		.reg_fields = ale_fields_cpsw_nu,
 		.nu_switch_ale = true,
 		.vlan_entry_tbl = vlan_entry_nu,
 	},
 	{
 		.dev_id = "j721e-cpswxg",
 		.features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING,
-		.major_ver_mask = 0x7,
+		.reg_fields = ale_fields_cpsw_nu,
 		.vlan_entry_tbl = vlan_entry_k3_cpswxg,
 	},
 	{
 		.dev_id = "am64-cpswxg",
 		.features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING,
-		.major_ver_mask = 0x7,
+		.reg_fields = ale_fields_cpsw_nu,
 		.vlan_entry_tbl = vlan_entry_k3_cpswxg,
 		.tbl_entries = 512,
 	},
@@ -1361,47 +1463,80 @@ cpsw_ale_dev_id *cpsw_ale_match_id(const struct cpsw_ale_dev_id *id,
 	return NULL;
 }
 
+static const struct regmap_config ale_regmap_cfg = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.name = "cpsw-ale",
+};
+
+static int cpsw_ale_regfield_init(struct cpsw_ale *ale)
+{
+	const struct reg_field *reg_fields = ale->params.reg_fields;
+	struct device *dev = ale->params.dev;
+	struct regmap *regmap = ale->regmap;
+	int i;
+
+	for (i = 0; i < ALE_FIELDS_MAX; i++) {
+		ale->fields[i] = devm_regmap_field_alloc(dev, regmap,
+							 reg_fields[i]);
+		if (IS_ERR(ale->fields[i])) {
+			dev_err(dev, "Unable to allocate regmap field %d\n", i);
+			return PTR_ERR(ale->fields[i]);
+		}
+	}
+
+	return 0;
+}
+
 struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 {
+	u32 ale_entries, rev_major, rev_minor, policers;
 	const struct cpsw_ale_dev_id *ale_dev_id;
 	struct cpsw_ale *ale;
-	u32 rev, ale_entries;
+	int ret;
 
 	ale_dev_id = cpsw_ale_match_id(cpsw_ale_id_match, params->dev_id);
 	if (!ale_dev_id)
 		return ERR_PTR(-EINVAL);
 
 	params->ale_entries = ale_dev_id->tbl_entries;
-	params->major_ver_mask = ale_dev_id->major_ver_mask;
 	params->nu_switch_ale = ale_dev_id->nu_switch_ale;
+	params->reg_fields = ale_dev_id->reg_fields;
 
 	ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL);
 	if (!ale)
 		return ERR_PTR(-ENOMEM);
+	ale->regmap = devm_regmap_init_mmio(params->dev, params->ale_regs,
+					    &ale_regmap_cfg);
+	if (IS_ERR(ale->regmap)) {
+		dev_err(params->dev, "Couldn't create CPSW ALE regmap\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ale->params = *params;
+	ret = cpsw_ale_regfield_init(ale);
+	if (ret)
+		return ERR_PTR(ret);
 
 	ale->p0_untag_vid_mask = devm_bitmap_zalloc(params->dev, VLAN_N_VID,
 						    GFP_KERNEL);
 	if (!ale->p0_untag_vid_mask)
 		return ERR_PTR(-ENOMEM);
 
-	ale->params = *params;
 	ale->ageout = ale->params.ale_ageout * HZ;
 	ale->features = ale_dev_id->features;
 	ale->vlan_entry_tbl = ale_dev_id->vlan_entry_tbl;
 
-	rev = readl_relaxed(ale->params.ale_regs + ALE_IDVER);
-	ale->version =
-		(ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask) << 8) |
-		 ALE_VERSION_MINOR(rev);
+	regmap_field_read(ale->fields[MINOR_VER], &rev_minor);
+	regmap_field_read(ale->fields[MAJOR_VER], &rev_major);
+	ale->version = rev_major << 8 | rev_minor;
 	dev_info(ale->params.dev, "initialized cpsw ale version %d.%d\n",
-		 ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask),
-		 ALE_VERSION_MINOR(rev));
+		 rev_major, rev_minor);
 
 	if (ale->features & CPSW_ALE_F_STATUS_REG &&
 	    !ale->params.ale_entries) {
-		ale_entries =
-			readl_relaxed(ale->params.ale_regs + ALE_STATUS) &
-			ALE_STATUS_SIZE_MASK;
+		regmap_field_read(ale->fields[ALE_ENTRIES], &ale_entries);
 		/* ALE available on newer NetCP switches has introduced
 		 * a register, ALE_STATUS, to indicate the size of ALE
 		 * table which shows the size as a multiple of 1024 entries.
@@ -1415,8 +1550,20 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 		ale_entries *= ALE_TABLE_SIZE_MULTIPLIER;
 		ale->params.ale_entries = ale_entries;
 	}
+
+	if (ale->features & CPSW_ALE_F_STATUS_REG &&
+	    !ale->params.num_policers) {
+		regmap_field_read(ale->fields[ALE_POLICERS], &policers);
+		if (!policers)
+			return ERR_PTR(-EINVAL);
+
+		policers *= ALE_POLICER_SIZE_MULTIPLIER;
+		ale->params.num_policers = policers;
+	}
+
 	dev_info(ale->params.dev,
-		 "ALE Table size %ld\n", ale->params.ale_entries);
+		 "ALE Table size %ld, Policers %ld\n", ale->params.ale_entries,
+		 ale->params.num_policers);
 
 	/* set default bits for existing h/w */
 	ale->port_mask_bits = ale->params.ale_ports;
@@ -1480,3 +1627,97 @@ u32 cpsw_ale_get_num_entries(struct cpsw_ale *ale)
 {
 	return ale ? ale->params.ale_entries : 0;
 }
+
+/* Reads the specified policer index into ALE POLICER registers */
+static void cpsw_ale_policer_read_idx(struct cpsw_ale *ale, u32 idx)
+{
+	idx &= ALE_POLICER_TBL_INDEX_MASK;
+	writel_relaxed(idx, ale->params.ale_regs + ALE_POLICER_TBL_CTL);
+}
+
+/* Writes the ALE POLICER registers into the specified policer index */
+static void cpsw_ale_policer_write_idx(struct cpsw_ale *ale, u32 idx)
+{
+	idx &= ALE_POLICER_TBL_INDEX_MASK;
+	idx |= ALE_POLICER_TBL_WRITE_ENABLE;
+	writel_relaxed(idx, ale->params.ale_regs + ALE_POLICER_TBL_CTL);
+}
+
+/* enables/disables the custom thread value for the specified policer index */
+static void cpsw_ale_policer_thread_idx_enable(struct cpsw_ale *ale, u32 idx,
+					       u32 thread_id, bool enable)
+{
+	regmap_field_write(ale->fields[ALE_THREAD_CLASS_INDEX], idx);
+	regmap_field_write(ale->fields[ALE_THREAD_VALUE], thread_id);
+	regmap_field_write(ale->fields[ALE_THREAD_ENABLE], enable ? 1 : 0);
+}
+
+/* Disable all policer entries and thread mappings */
+static void cpsw_ale_policer_reset(struct cpsw_ale *ale)
+{
+	int i;
+
+	for (i = 0; i < ale->params.num_policers ; i++) {
+		cpsw_ale_policer_read_idx(ale, i);
+		regmap_field_write(ale->fields[POL_PORT_MEN], 0);
+		regmap_field_write(ale->fields[POL_PRI_MEN], 0);
+		regmap_field_write(ale->fields[POL_OUI_MEN], 0);
+		regmap_field_write(ale->fields[POL_DST_MEN], 0);
+		regmap_field_write(ale->fields[POL_SRC_MEN], 0);
+		regmap_field_write(ale->fields[POL_OVLAN_MEN], 0);
+		regmap_field_write(ale->fields[POL_IVLAN_MEN], 0);
+		regmap_field_write(ale->fields[POL_ETHERTYPE_MEN], 0);
+		regmap_field_write(ale->fields[POL_IPSRC_MEN], 0);
+		regmap_field_write(ale->fields[POL_IPDST_MEN], 0);
+		regmap_field_write(ale->fields[POL_EN], 0);
+		regmap_field_write(ale->fields[POL_RED_DROP_EN], 0);
+		regmap_field_write(ale->fields[POL_YELLOW_DROP_EN], 0);
+		regmap_field_write(ale->fields[POL_PRIORITY_THREAD_EN], 0);
+
+		cpsw_ale_policer_thread_idx_enable(ale, i, 0, 0);
+	}
+}
+
+/* Default classifier is to map 8 user priorities to N receive channels */
+void cpsw_ale_classifier_setup_default(struct cpsw_ale *ale, int num_rx_ch)
+{
+	int pri, idx;
+	/* IEEE802.1D-2004, Standard for Local and metropolitan area networks
+	 *    Table G-2 - Traffic type acronyms
+	 *    Table G-3 - Defining traffic types
+	 * User priority values 1 and 2 effectively communicate a lower
+	 * priority than 0. In the below table 0 is assigned to higher priority
+	 * thread than 1 and 2 wherever possible.
+	 * The below table maps which thread the user priority needs to be
+	 * sent to for a given number of threads (RX channels). Upper threads
+	 * have higher priority.
+	 * e.g. if number of threads is 8 then user priority 0 will map to
+	 * pri_thread_map[8-1][0] i.e. thread 2
+	 */
+	int pri_thread_map[8][8] = {	{ 0, 0, 0, 0, 0, 0, 0, 0, },
+					{ 0, 0, 0, 0, 1, 1, 1, 1, },
+					{ 0, 0, 0, 0, 1, 1, 2, 2, },
+					{ 1, 0, 0, 1, 2, 2, 3, 3, },
+					{ 1, 0, 0, 1, 2, 3, 4, 4, },
+					{ 1, 0, 0, 2, 3, 4, 5, 5, },
+					{ 1, 0, 0, 2, 3, 4, 5, 6, },
+					{ 2, 0, 1, 3, 4, 5, 6, 7, } };
+
+	cpsw_ale_policer_reset(ale);
+
+	/* use first 8 classifiers to map 8 (DSCP/PCP) priorities to channels */
+	for (pri = 0; pri < 8; pri++) {
+		idx = pri;
+
+		/* Classifier 'idx' match on priority 'pri' */
+		cpsw_ale_policer_read_idx(ale, idx);
+		regmap_field_write(ale->fields[POL_PRI_VAL], pri);
+		regmap_field_write(ale->fields[POL_PRI_MEN], 1);
+		cpsw_ale_policer_write_idx(ale, idx);
+
+		/* Map Classifier 'idx' to thread provided by the map */
+		cpsw_ale_policer_thread_idx_enable(ale, idx,
+						   pri_thread_map[num_rx_ch - 1][pri],
+						   1);
+	}
+}
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index 6779ee111d57..1e4e9a3dd234 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -8,11 +8,14 @@
 #ifndef __TI_CPSW_ALE_H__
 #define __TI_CPSW_ALE_H__
 
+struct reg_fields;
+
 struct cpsw_ale_params {
 	struct device		*dev;
 	void __iomem		*ale_regs;
 	unsigned long		ale_ageout;	/* in secs */
 	unsigned long		ale_entries;
+	unsigned long		num_policers;
 	unsigned long		ale_ports;
 	/* NU Switch has specific handling as number of bits in ALE entries
 	 * are different than other versions of ALE. Also there are specific
@@ -20,19 +23,69 @@ struct cpsw_ale_params {
 	 * to identify this hardware.
 	 */
 	bool			nu_switch_ale;
-	/* mask bit used in NU Switch ALE is 3 bits instead of 8 bits. So
-	 * pass it from caller.
-	 */
-	u32			major_ver_mask;
+	const struct reg_field *reg_fields;
 	const char		*dev_id;
 	unsigned long		bus_freq;
 };
 
 struct ale_entry_fld;
+struct regmap;
+
+enum ale_fields {
+	MINOR_VER,
+	MAJOR_VER,
+	ALE_ENTRIES,
+	ALE_POLICERS,
+	POL_PORT_MEN,
+	POL_TRUNK_ID,
+	POL_PORT_NUM,
+	POL_PRI_MEN,
+	POL_PRI_VAL,
+	POL_OUI_MEN,
+	POL_OUI_INDEX,
+	POL_DST_MEN,
+	POL_DST_INDEX,
+	POL_SRC_MEN,
+	POL_SRC_INDEX,
+	POL_OVLAN_MEN,
+	POL_OVLAN_INDEX,
+	POL_IVLAN_MEN,
+	POL_IVLAN_INDEX,
+	POL_ETHERTYPE_MEN,
+	POL_ETHERTYPE_INDEX,
+	POL_IPSRC_MEN,
+	POL_IPSRC_INDEX,
+	POL_IPDST_MEN,
+	POL_IPDST_INDEX,
+	POL_EN,
+	POL_RED_DROP_EN,
+	POL_YELLOW_DROP_EN,
+	POL_YELLOW_THRESH,
+	POL_POL_MATCH_MODE,
+	POL_PRIORITY_THREAD_EN,
+	POL_MAC_ONLY_DEF_DIS,
+	POL_TEST_CLR,
+	POL_TEST_CLR_RED,
+	POL_TEST_CLR_YELLOW,
+	POL_TEST_CLR_SELECTED,
+	POL_TEST_ENTRY,
+	POL_STATUS_HIT,
+	POL_STATUS_HIT_RED,
+	POL_STATUS_HIT_YELLOW,
+	ALE_DEFAULT_THREAD_EN,
+	ALE_DEFAULT_THREAD_VAL,
+	ALE_THREAD_CLASS_INDEX,
+	ALE_THREAD_ENABLE,
+	ALE_THREAD_VALUE,
+	/* terminator */
+	ALE_FIELDS_MAX,
+};
 
 struct cpsw_ale {
 	struct cpsw_ale_params	params;
 	struct timer_list	timer;
+	struct regmap		*regmap;
+	struct regmap_field	*fields[ALE_FIELDS_MAX];
 	unsigned long		ageout;
 	u32			version;
 	u32			features;
@@ -140,5 +193,6 @@ int cpsw_ale_vlan_add_modify(struct cpsw_ale *ale, u16 vid, int port_mask,
 int cpsw_ale_vlan_del_modify(struct cpsw_ale *ale, u16 vid, int port_mask);
 void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask,
 			      bool add);
+void cpsw_ale_classifier_setup_default(struct cpsw_ale *ale, int num_rx_ch);
 
 #endif
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index 53ed23d68722..21d55a180ef6 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -725,8 +725,6 @@ int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *inf
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
 	info->phc_index = cpsw->cpts->phc_index;
 	info->tx_types =
@@ -741,10 +739,7 @@ int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *inf
 int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info)
 {
 	info->so_timestamping =
-		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE;
-	info->phc_index = -1;
+		SOF_TIMESTAMPING_TX_SOFTWARE;
 	info->tx_types = 0;
 	info->rx_filters = 0;
 	return 0;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 2baa198ebfa0..557cc71b9dd2 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1407,7 +1407,8 @@ static int cpsw_create_ports(struct cpsw_common *cpsw)
 		cpsw->slaves[i].ndev = ndev;
 
 		ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
-				  NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC;
+				  NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_TC;
+		ndev->netns_local = true;
 
 		ndev->xdp_features = NETDEV_XDP_ACT_BASIC |
 				     NETDEV_XDP_ACT_REDIRECT |
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c
index 75c294ce6fb6..5d6d1cf78e93 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.c
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.c
@@ -53,78 +53,6 @@
 #define IEP_CAP_CFG_CAPNR_1ST_EVENT_EN(n)	BIT(LATCH_INDEX(n))
 #define IEP_CAP_CFG_CAP_ASYNC_EN(n)		BIT(LATCH_INDEX(n) + 10)
 
-enum {
-	ICSS_IEP_GLOBAL_CFG_REG,
-	ICSS_IEP_GLOBAL_STATUS_REG,
-	ICSS_IEP_COMPEN_REG,
-	ICSS_IEP_SLOW_COMPEN_REG,
-	ICSS_IEP_COUNT_REG0,
-	ICSS_IEP_COUNT_REG1,
-	ICSS_IEP_CAPTURE_CFG_REG,
-	ICSS_IEP_CAPTURE_STAT_REG,
-
-	ICSS_IEP_CAP6_RISE_REG0,
-	ICSS_IEP_CAP6_RISE_REG1,
-
-	ICSS_IEP_CAP7_RISE_REG0,
-	ICSS_IEP_CAP7_RISE_REG1,
-
-	ICSS_IEP_CMP_CFG_REG,
-	ICSS_IEP_CMP_STAT_REG,
-	ICSS_IEP_CMP0_REG0,
-	ICSS_IEP_CMP0_REG1,
-	ICSS_IEP_CMP1_REG0,
-	ICSS_IEP_CMP1_REG1,
-
-	ICSS_IEP_CMP8_REG0,
-	ICSS_IEP_CMP8_REG1,
-	ICSS_IEP_SYNC_CTRL_REG,
-	ICSS_IEP_SYNC0_STAT_REG,
-	ICSS_IEP_SYNC1_STAT_REG,
-	ICSS_IEP_SYNC_PWIDTH_REG,
-	ICSS_IEP_SYNC0_PERIOD_REG,
-	ICSS_IEP_SYNC1_DELAY_REG,
-	ICSS_IEP_SYNC_START_REG,
-	ICSS_IEP_MAX_REGS,
-};
-
-/**
- * struct icss_iep_plat_data - Plat data to handle SoC variants
- * @config: Regmap configuration data
- * @reg_offs: register offsets to capture offset differences across SoCs
- * @flags: Flags to represent IEP properties
- */
-struct icss_iep_plat_data {
-	const struct regmap_config *config;
-	u32 reg_offs[ICSS_IEP_MAX_REGS];
-	u32 flags;
-};
-
-struct icss_iep {
-	struct device *dev;
-	void __iomem *base;
-	const struct icss_iep_plat_data *plat_data;
-	struct regmap *map;
-	struct device_node *client_np;
-	unsigned long refclk_freq;
-	int clk_tick_time;	/* one refclk tick time in ns */
-	struct ptp_clock_info ptp_info;
-	struct ptp_clock *ptp_clock;
-	struct mutex ptp_clk_mutex;	/* PHC access serializer */
-	u32 def_inc;
-	s16 slow_cmp_inc;
-	u32 slow_cmp_count;
-	const struct icss_iep_clockops *ops;
-	void *clockops_data;
-	u32 cycle_time_ns;
-	u32 perout_enabled;
-	bool pps_enabled;
-	int cap_cmp_irq;
-	u64 period;
-	u32 latch_enable;
-	struct work_struct work;
-};
-
 /**
  * icss_iep_get_count_hi() - Get the upper 32 bit IEP counter
  * @iep: Pointer to structure representing IEP.
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.h b/drivers/net/ethernet/ti/icssg/icss_iep.h
index 803a4b714893..0bdca0155abd 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.h
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.h
@@ -12,7 +12,78 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/regmap.h>
 
-struct icss_iep;
+enum {
+	ICSS_IEP_GLOBAL_CFG_REG,
+	ICSS_IEP_GLOBAL_STATUS_REG,
+	ICSS_IEP_COMPEN_REG,
+	ICSS_IEP_SLOW_COMPEN_REG,
+	ICSS_IEP_COUNT_REG0,
+	ICSS_IEP_COUNT_REG1,
+	ICSS_IEP_CAPTURE_CFG_REG,
+	ICSS_IEP_CAPTURE_STAT_REG,
+
+	ICSS_IEP_CAP6_RISE_REG0,
+	ICSS_IEP_CAP6_RISE_REG1,
+
+	ICSS_IEP_CAP7_RISE_REG0,
+	ICSS_IEP_CAP7_RISE_REG1,
+
+	ICSS_IEP_CMP_CFG_REG,
+	ICSS_IEP_CMP_STAT_REG,
+	ICSS_IEP_CMP0_REG0,
+	ICSS_IEP_CMP0_REG1,
+	ICSS_IEP_CMP1_REG0,
+	ICSS_IEP_CMP1_REG1,
+
+	ICSS_IEP_CMP8_REG0,
+	ICSS_IEP_CMP8_REG1,
+	ICSS_IEP_SYNC_CTRL_REG,
+	ICSS_IEP_SYNC0_STAT_REG,
+	ICSS_IEP_SYNC1_STAT_REG,
+	ICSS_IEP_SYNC_PWIDTH_REG,
+	ICSS_IEP_SYNC0_PERIOD_REG,
+	ICSS_IEP_SYNC1_DELAY_REG,
+	ICSS_IEP_SYNC_START_REG,
+	ICSS_IEP_MAX_REGS,
+};
+
+/**
+ * struct icss_iep_plat_data - Plat data to handle SoC variants
+ * @config: Regmap configuration data
+ * @reg_offs: register offsets to capture offset differences across SoCs
+ * @flags: Flags to represent IEP properties
+ */
+struct icss_iep_plat_data {
+	const struct regmap_config *config;
+	u32 reg_offs[ICSS_IEP_MAX_REGS];
+	u32 flags;
+};
+
+struct icss_iep {
+	struct device *dev;
+	void __iomem *base;
+	const struct icss_iep_plat_data *plat_data;
+	struct regmap *map;
+	struct device_node *client_np;
+	unsigned long refclk_freq;
+	int clk_tick_time;	/* one refclk tick time in ns */
+	struct ptp_clock_info ptp_info;
+	struct ptp_clock *ptp_clock;
+	struct mutex ptp_clk_mutex;	/* PHC access serializer */
+	u32 def_inc;
+	s16 slow_cmp_inc;
+	u32 slow_cmp_count;
+	const struct icss_iep_clockops *ops;
+	void *clockops_data;
+	u32 cycle_time_ns;
+	u32 perout_enabled;
+	bool pps_enabled;
+	int cap_cmp_irq;
+	u64 period;
+	u32 latch_enable;
+	struct work_struct work;
+};
+
 extern const struct icss_iep_clockops prueth_iep_clockops;
 
 /* Firmware specific clock operations */
diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
index 9ec504d976d6..833ca86d0b71 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
@@ -290,6 +290,7 @@ void icssg_class_set_host_mac_addr(struct regmap *miig_rt, const u8 *mac)
 		     mac[2] << 16 | mac[3] << 24));
 	regmap_write(miig_rt, MAC_INTERFACE_1, (u32)(mac[4] | mac[5] << 8));
 }
+EXPORT_SYMBOL_GPL(icssg_class_set_host_mac_addr);
 
 void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac)
 {
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index b9d8a93d1680..fdebeb2f84e0 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -660,14 +660,15 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev
 {
 	struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc;
 	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
 	struct netdev_queue *netif_txq;
 	struct prueth_tx_chn *tx_chn;
 	dma_addr_t desc_dma, buf_dma;
+	u32 pkt_len, dst_tag_id;
 	int i, ret = 0, q_idx;
 	bool in_tx_ts = 0;
 	int tx_ts_cookie;
 	void **swdata;
-	u32 pkt_len;
 	u32 *epib;
 
 	pkt_len = skb_headlen(skb);
@@ -712,9 +713,20 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev
 
 	/* set dst tag to indicate internal qid at the firmware which is at
 	 * bit8..bit15. bit0..bit7 indicates port num for directed
-	 * packets in case of switch mode operation
+	 * packets in case of switch mode operation and port num 0
+	 * for undirected packets in case of HSR offload mode
 	 */
-	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
+	dst_tag_id = emac->port_id | (q_idx << 8);
+
+	if (prueth->is_hsr_offload_mode &&
+	    (ndev->features & NETIF_F_HW_HSR_DUP))
+		dst_tag_id = PRUETH_UNDIRECTED_PKT_DST_TAG;
+
+	if (prueth->is_hsr_offload_mode &&
+	    (ndev->features & NETIF_F_HW_HSR_TAG_INS))
+		epib[1] |= PRUETH_UNDIRECTED_PKT_TAG_INS;
+
+	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, dst_tag_id);
 	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
 	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
 	swdata = cppi5_hdesc_get_swdata(first_desc);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c
index dae52a83a378..72ace151d8e9 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.c
@@ -107,7 +107,7 @@ static const struct map hwq_map[2][ICSSG_NUM_OTHER_QUEUES] = {
 	},
 };
 
-static void icssg_config_mii_init_switch(struct prueth_emac *emac)
+static void icssg_config_mii_init_fw_offload(struct prueth_emac *emac)
 {
 	struct prueth *prueth = emac->prueth;
 	int mii = prueth_emac_slice(emac);
@@ -278,7 +278,7 @@ static int emac_r30_is_done(struct prueth_emac *emac)
 	return 1;
 }
 
-static int prueth_switch_buffer_setup(struct prueth_emac *emac)
+static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac)
 {
 	struct icssg_buffer_pool_cfg __iomem *bpool_cfg;
 	struct icssg_rxq_ctx __iomem *rxq_ctx;
@@ -424,7 +424,7 @@ static void icssg_init_emac_mode(struct prueth *prueth)
 	icssg_class_set_host_mac_addr(prueth->miig_rt, mac);
 }
 
-static void icssg_init_switch_mode(struct prueth *prueth)
+static void icssg_init_fw_offload_mode(struct prueth *prueth)
 {
 	u32 addr = prueth->shram.pa + EMAC_ICSSG_SWITCH_DEFAULT_VLAN_TABLE_OFFSET;
 	int i;
@@ -455,8 +455,8 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice)
 	struct icssg_flow_cfg __iomem *flow_cfg;
 	int ret;
 
-	if (prueth->is_switch_mode)
-		icssg_init_switch_mode(prueth);
+	if (prueth->is_switch_mode || prueth->is_hsr_offload_mode)
+		icssg_init_fw_offload_mode(prueth);
 	else
 		icssg_init_emac_mode(prueth);
 
@@ -472,8 +472,8 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice)
 	regmap_update_bits(prueth->miig_rt, ICSSG_CFG_OFFSET,
 			   ICSSG_CFG_DEFAULT, ICSSG_CFG_DEFAULT);
 	icssg_miig_set_interface_mode(prueth->miig_rt, slice, emac->phy_if);
-	if (prueth->is_switch_mode)
-		icssg_config_mii_init_switch(emac);
+	if (prueth->is_switch_mode || prueth->is_hsr_offload_mode)
+		icssg_config_mii_init_fw_offload(emac);
 	else
 		icssg_config_mii_init(emac);
 	icssg_config_ipg(emac);
@@ -498,8 +498,8 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice)
 	writeb(0, config + SPL_PKT_DEFAULT_PRIORITY);
 	writeb(0, config + QUEUE_NUM_UNTAGGED);
 
-	if (prueth->is_switch_mode)
-		ret = prueth_switch_buffer_setup(emac);
+	if (prueth->is_switch_mode || prueth->is_hsr_offload_mode)
+		ret = prueth_fw_offload_buffer_setup(emac);
 	else
 		ret = prueth_emac_buffer_setup(emac);
 	if (ret)
@@ -531,7 +531,9 @@ static const struct icssg_r30_cmd emac_r32_bitmask[] = {
 	{{EMAC_NONE,  0xffff4000, EMAC_NONE, EMAC_NONE}},	/* Preemption on Tx ENABLE*/
 	{{EMAC_NONE,  0xbfff0000, EMAC_NONE, EMAC_NONE}},	/* Preemption on Tx DISABLE*/
 	{{0xffff0010,  EMAC_NONE, 0xffff0010, EMAC_NONE}},	/* VLAN AWARE*/
-	{{0xffef0000,  EMAC_NONE, 0xffef0000, EMAC_NONE}}	/* VLAN UNWARE*/
+	{{0xffef0000,  EMAC_NONE, 0xffef0000, EMAC_NONE}},	/* VLAN UNWARE*/
+	{{0xffff2000, EMAC_NONE, EMAC_NONE, EMAC_NONE}},	/* HSR_RX_OFFLOAD_ENABLE */
+	{{0xdfff0000, EMAC_NONE, EMAC_NONE, EMAC_NONE}}		/* HSR_RX_OFFLOAD_DISABLE */
 };
 
 int icssg_set_port_state(struct prueth_emac *emac,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h
index 1ac60283923b..92c2deaa3068 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.h
@@ -80,6 +80,8 @@ enum icssg_port_state_cmd {
 	ICSSG_EMAC_PORT_PREMPT_TX_DISABLE,
 	ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE,
 	ICSSG_EMAC_PORT_VLAN_AWARE_DISABLE,
+	ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE,
+	ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE,
 	ICSSG_EMAC_PORT_MAX_COMMANDS
 };
 
diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
index 5688f054cec5..b715af21d23a 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
@@ -68,9 +68,13 @@ static int emac_nway_reset(struct net_device *ndev)
 
 static int emac_get_sset_count(struct net_device *ndev, int stringset)
 {
+	struct prueth_emac *emac = netdev_priv(ndev);
 	switch (stringset) {
 	case ETH_SS_STATS:
-		return ICSSG_NUM_ETHTOOL_STATS;
+		if (emac->prueth->pa_stats)
+			return ICSSG_NUM_ETHTOOL_STATS;
+		else
+			return ICSSG_NUM_ETHTOOL_STATS - ICSSG_NUM_PA_STATS;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -78,18 +82,18 @@ static int emac_get_sset_count(struct net_device *ndev, int stringset)
 
 static void emac_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
+	struct prueth_emac *emac = netdev_priv(ndev);
 	u8 *p = data;
 	int i;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(icssg_all_stats); i++) {
-			if (!icssg_all_stats[i].standard_stats) {
-				memcpy(p, icssg_all_stats[i].name,
-				       ETH_GSTRING_LEN);
-				p += ETH_GSTRING_LEN;
-			}
-		}
+		for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++)
+			if (!icssg_all_miig_stats[i].standard_stats)
+				ethtool_puts(&p, icssg_all_miig_stats[i].name);
+		if (emac->prueth->pa_stats)
+			for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++)
+				ethtool_puts(&p, icssg_all_pa_stats[i].name);
 		break;
 	default:
 		break;
@@ -104,9 +108,13 @@ static void emac_get_ethtool_stats(struct net_device *ndev,
 
 	emac_update_hardware_stats(emac);
 
-	for (i = 0; i < ARRAY_SIZE(icssg_all_stats); i++)
-		if (!icssg_all_stats[i].standard_stats)
+	for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++)
+		if (!icssg_all_miig_stats[i].standard_stats)
 			*(data++) = emac->stats[i];
+
+	if (emac->prueth->pa_stats)
+		for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++)
+			*(data++) = emac->pa_stats[i];
 }
 
 static int emac_get_ts_info(struct net_device *ndev,
@@ -118,8 +126,6 @@ static int emac_get_ts_info(struct net_device *ndev,
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	info->phc_index = icss_iep_get_ptp_clock_idx(emac->iep);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 3e51b3a9b0a5..5fd9902ab181 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -13,6 +13,7 @@
 #include <linux/dma/ti-cppi5.h>
 #include <linux/etherdevice.h>
 #include <linux/genalloc.h>
+#include <linux/if_hsr.h>
 #include <linux/if_vlan.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -40,6 +41,11 @@
 #define DEFAULT_PORT_MASK	1
 #define DEFAULT_UNTAG_MASK	1
 
+#define NETIF_PRUETH_HSR_OFFLOAD_FEATURES	(NETIF_F_HW_HSR_FWD | \
+						 NETIF_F_HW_HSR_DUP | \
+						 NETIF_F_HW_HSR_TAG_INS | \
+						 NETIF_F_HW_HSR_TAG_RM)
+
 /* CTRLMMR_ICSSG_RGMII_CTRL register bits */
 #define ICSSG_CTRL_RGMII_ID_MODE                BIT(24)
 
@@ -118,6 +124,19 @@ static irqreturn_t prueth_tx_ts_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static struct icssg_firmwares icssg_hsr_firmwares[] = {
+	{
+		.pru = "ti-pruss/am65x-sr2-pru0-pruhsr-fw.elf",
+		.rtu = "ti-pruss/am65x-sr2-rtu0-pruhsr-fw.elf",
+		.txpru = "ti-pruss/am65x-sr2-txpru0-pruhsr-fw.elf",
+	},
+	{
+		.pru = "ti-pruss/am65x-sr2-pru1-pruhsr-fw.elf",
+		.rtu = "ti-pruss/am65x-sr2-rtu1-pruhsr-fw.elf",
+		.txpru = "ti-pruss/am65x-sr2-txpru1-pruhsr-fw.elf",
+	}
+};
+
 static struct icssg_firmwares icssg_switch_firmwares[] = {
 	{
 		.pru = "ti-pruss/am65x-sr2-pru0-prusw-fw.elf",
@@ -152,6 +171,8 @@ static int prueth_emac_start(struct prueth *prueth, struct prueth_emac *emac)
 
 	if (prueth->is_switch_mode)
 		firmwares = icssg_switch_firmwares;
+	else if (prueth->is_hsr_offload_mode)
+		firmwares = icssg_hsr_firmwares;
 	else
 		firmwares = icssg_emac_firmwares;
 
@@ -365,7 +386,8 @@ static void prueth_iep_settime(void *clockops_data, u64 ns)
 	sc_desc.cyclecounter0_set = cyclecount & GENMASK(31, 0);
 	sc_desc.cyclecounter1_set = (cyclecount & GENMASK(63, 32)) >> 32;
 	sc_desc.iepcount_set = ns % cycletime;
-	sc_desc.CMP0_current = cycletime - 4; //Count from 0 to (cycle time)-4
+	/* Count from 0 to (cycle time) - emac->iep->def_inc */
+	sc_desc.CMP0_current = cycletime - emac->iep->def_inc;
 
 	memcpy_toio(sc_descp, &sc_desc, sizeof(sc_desc));
 
@@ -470,6 +492,36 @@ static int icssg_prueth_del_mcast(struct net_device *ndev, const u8 *addr)
 	return 0;
 }
 
+static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
+
+	icssg_fdb_add_del(emac, addr, prueth->default_vlan,
+			  ICSSG_FDB_ENTRY_P0_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_P1_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_P2_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_BLOCK, true);
+
+	icssg_vtbl_modify(emac, emac->port_vlan, BIT(emac->port_id),
+			  BIT(emac->port_id), true);
+	return 0;
+}
+
+static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
+
+	icssg_fdb_add_del(emac, addr, prueth->default_vlan,
+			  ICSSG_FDB_ENTRY_P0_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_P1_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_P2_MEMBERSHIP |
+			  ICSSG_FDB_ENTRY_BLOCK, false);
+
+	return 0;
+}
+
 /**
  * emac_ndo_open - EMAC device open
  * @ndev: network adapter device
@@ -630,7 +682,10 @@ static int emac_ndo_stop(struct net_device *ndev)
 
 	icssg_class_disable(prueth->miig_rt, prueth_emac_slice(emac));
 
-	__dev_mc_unsync(ndev, icssg_prueth_del_mcast);
+	if (emac->prueth->is_hsr_offload_mode)
+		__dev_mc_unsync(ndev, icssg_prueth_hsr_del_mcast);
+	else
+		__dev_mc_unsync(ndev, icssg_prueth_del_mcast);
 
 	atomic_set(&emac->tdown_cnt, emac->tx_ch_num);
 	/* ensure new tdown_cnt value is visible */
@@ -708,7 +763,12 @@ static void emac_ndo_set_rx_mode_work(struct work_struct *work)
 		return;
 	}
 
-	__dev_mc_sync(ndev, icssg_prueth_add_mcast, icssg_prueth_del_mcast);
+	if (emac->prueth->is_hsr_offload_mode)
+		__dev_mc_sync(ndev, icssg_prueth_hsr_add_mcast,
+			      icssg_prueth_hsr_del_mcast);
+	else
+		__dev_mc_sync(ndev, icssg_prueth_add_mcast,
+			      icssg_prueth_del_mcast);
 }
 
 /**
@@ -725,6 +785,29 @@ static void emac_ndo_set_rx_mode(struct net_device *ndev)
 	queue_work(emac->cmd_wq, &emac->rx_mode_work);
 }
 
+static netdev_features_t emac_ndo_fix_features(struct net_device *ndev,
+					       netdev_features_t features)
+{
+	/* hsr tag insertion offload and hsr dup offload are tightly coupled in
+	 * firmware implementation. Both these features need to be enabled /
+	 * disabled together.
+	 */
+	if (!(ndev->features & (NETIF_F_HW_HSR_DUP | NETIF_F_HW_HSR_TAG_INS)))
+		if ((features & NETIF_F_HW_HSR_DUP) ||
+		    (features & NETIF_F_HW_HSR_TAG_INS))
+			features |= NETIF_F_HW_HSR_DUP |
+				    NETIF_F_HW_HSR_TAG_INS;
+
+	if ((ndev->features & NETIF_F_HW_HSR_DUP) ||
+	    (ndev->features & NETIF_F_HW_HSR_TAG_INS))
+		if (!(features & NETIF_F_HW_HSR_DUP) ||
+		    !(features & NETIF_F_HW_HSR_TAG_INS))
+			features &= ~(NETIF_F_HW_HSR_DUP |
+				      NETIF_F_HW_HSR_TAG_INS);
+
+	return features;
+}
+
 static const struct net_device_ops emac_netdev_ops = {
 	.ndo_open = emac_ndo_open,
 	.ndo_stop = emac_ndo_stop,
@@ -736,6 +819,7 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_eth_ioctl = icssg_ndo_ioctl,
 	.ndo_get_stats64 = icssg_ndo_get_stats64,
 	.ndo_get_phys_port_name = icssg_ndo_get_phys_port_name,
+	.ndo_fix_features = emac_ndo_fix_features,
 };
 
 static int prueth_netdev_init(struct prueth *prueth,
@@ -857,12 +941,14 @@ static int prueth_netdev_init(struct prueth *prueth,
 	}
 	ether_addr_copy(emac->mac_addr, ndev->dev_addr);
 
+	ndev->dev.of_node = eth_node;
 	ndev->min_mtu = PRUETH_MIN_PKT_SIZE;
 	ndev->max_mtu = PRUETH_MAX_MTU;
 	ndev->netdev_ops = &emac_netdev_ops;
 	ndev->ethtool_ops = &icssg_ethtool_ops;
 	ndev->hw_features = NETIF_F_SG;
 	ndev->features = ndev->hw_features;
+	ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES;
 
 	netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll);
 	hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC,
@@ -951,7 +1037,7 @@ static void prueth_emac_restart(struct prueth *prueth)
 	netif_device_attach(emac1->ndev);
 }
 
-static void icssg_enable_switch_mode(struct prueth *prueth)
+static void icssg_change_mode(struct prueth *prueth)
 {
 	struct prueth_emac *emac;
 	int mac;
@@ -960,6 +1046,13 @@ static void icssg_enable_switch_mode(struct prueth *prueth)
 
 	for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) {
 		emac = prueth->emac[mac];
+		if (prueth->is_hsr_offload_mode) {
+			if (emac->ndev->features & NETIF_F_HW_HSR_TAG_RM)
+				icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE);
+			else
+				icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE);
+		}
+
 		if (netif_running(emac->ndev)) {
 			icssg_fdb_add_del(emac, eth_stp_addr, prueth->default_vlan,
 					  ICSSG_FDB_ENTRY_P0_MEMBERSHIP |
@@ -971,8 +1064,13 @@ static void icssg_enable_switch_mode(struct prueth *prueth)
 					  BIT(emac->port_id) | DEFAULT_PORT_MASK,
 					  BIT(emac->port_id) | DEFAULT_UNTAG_MASK,
 					  true);
+			if (prueth->is_hsr_offload_mode)
+				icssg_vtbl_modify(emac, DEFAULT_VID,
+						  DEFAULT_PORT_MASK,
+						  DEFAULT_UNTAG_MASK, true);
 			icssg_set_pvid(prueth, emac->port_vlan, emac->port_id);
-			icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE);
+			if (prueth->is_switch_mode)
+				icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE);
 		}
 	}
 }
@@ -1010,7 +1108,7 @@ static int prueth_netdevice_port_link(struct net_device *ndev,
 			prueth->is_switch_mode = true;
 			prueth->default_vlan = 1;
 			emac->port_vlan = prueth->default_vlan;
-			icssg_enable_switch_mode(prueth);
+			icssg_change_mode(prueth);
 		}
 	}
 
@@ -1038,6 +1136,61 @@ static void prueth_netdevice_port_unlink(struct net_device *ndev)
 		prueth->hw_bridge_dev = NULL;
 }
 
+static int prueth_hsr_port_link(struct net_device *ndev)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
+	struct prueth_emac *emac0;
+	struct prueth_emac *emac1;
+
+	emac0 = prueth->emac[PRUETH_MAC0];
+	emac1 = prueth->emac[PRUETH_MAC1];
+
+	if (prueth->is_switch_mode)
+		return -EOPNOTSUPP;
+
+	prueth->hsr_members |= BIT(emac->port_id);
+	if (!prueth->is_hsr_offload_mode) {
+		if (prueth->hsr_members & BIT(PRUETH_PORT_MII0) &&
+		    prueth->hsr_members & BIT(PRUETH_PORT_MII1)) {
+			if (!(emac0->ndev->features &
+			      NETIF_PRUETH_HSR_OFFLOAD_FEATURES) &&
+			    !(emac1->ndev->features &
+			      NETIF_PRUETH_HSR_OFFLOAD_FEATURES))
+				return -EOPNOTSUPP;
+			prueth->is_hsr_offload_mode = true;
+			prueth->default_vlan = 1;
+			emac0->port_vlan = prueth->default_vlan;
+			emac1->port_vlan = prueth->default_vlan;
+			icssg_change_mode(prueth);
+			netdev_dbg(ndev, "Enabling HSR offload mode\n");
+		}
+	}
+
+	return 0;
+}
+
+static void prueth_hsr_port_unlink(struct net_device *ndev)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
+	struct prueth_emac *emac0;
+	struct prueth_emac *emac1;
+
+	emac0 = prueth->emac[PRUETH_MAC0];
+	emac1 = prueth->emac[PRUETH_MAC1];
+
+	prueth->hsr_members &= ~BIT(emac->port_id);
+	if (prueth->is_hsr_offload_mode) {
+		prueth->is_hsr_offload_mode = false;
+		emac0->port_vlan = 0;
+		emac1->port_vlan = 0;
+		prueth->hsr_dev = NULL;
+		prueth_emac_restart(prueth);
+		netdev_dbg(ndev, "Disabling HSR Offload mode\n");
+	}
+}
+
 /* netdev notifier */
 static int prueth_netdevice_event(struct notifier_block *unused,
 				  unsigned long event, void *ptr)
@@ -1045,6 +1198,8 @@ static int prueth_netdevice_event(struct notifier_block *unused,
 	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info;
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct prueth *prueth = emac->prueth;
 	int ret = NOTIFY_DONE;
 
 	if (ndev->netdev_ops != &emac_netdev_ops)
@@ -1054,6 +1209,25 @@ static int prueth_netdevice_event(struct notifier_block *unused,
 	case NETDEV_CHANGEUPPER:
 		info = ptr;
 
+		if ((ndev->features & NETIF_PRUETH_HSR_OFFLOAD_FEATURES) &&
+		    is_hsr_master(info->upper_dev)) {
+			if (info->linking) {
+				if (!prueth->hsr_dev) {
+					prueth->hsr_dev = info->upper_dev;
+					icssg_class_set_host_mac_addr(prueth->miig_rt,
+								      prueth->hsr_dev->dev_addr);
+				} else {
+					if (prueth->hsr_dev != info->upper_dev) {
+						netdev_dbg(ndev, "Both interfaces must be linked to same upper device\n");
+						return -EOPNOTSUPP;
+					}
+				}
+				prueth_hsr_port_link(ndev);
+			} else {
+				prueth_hsr_port_unlink(ndev);
+			}
+		}
+
 		if (netif_is_bridge_master(info->upper_dev)) {
 			if (info->linking)
 				ret = prueth_netdevice_port_link(ndev, info->upper_dev, extack);
@@ -1181,6 +1355,12 @@ static int prueth_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	prueth->pa_stats = syscon_regmap_lookup_by_phandle(np, "ti,pa-stats");
+	if (IS_ERR(prueth->pa_stats)) {
+		dev_err(dev, "couldn't get ti,pa-stats syscon regmap\n");
+		prueth->pa_stats = NULL;
+	}
+
 	if (eth0_node) {
 		ret = prueth_get_cores(prueth, ICSS_SLICE0, false);
 		if (ret)
@@ -1271,8 +1451,8 @@ static int prueth_probe(struct platform_device *pdev)
 			goto exit_iep;
 		}
 
-		if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL))
-			prueth->emac[PRUETH_MAC0]->half_duplex = 1;
+		prueth->emac[PRUETH_MAC0]->half_duplex =
+			of_property_read_bool(eth0_node, "ti,half-duplex-capable");
 
 		prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
 	}
@@ -1285,8 +1465,8 @@ static int prueth_probe(struct platform_device *pdev)
 			goto netdev_exit;
 		}
 
-		if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL))
-			prueth->emac[PRUETH_MAC1]->half_duplex = 1;
+		prueth->emac[PRUETH_MAC1]->half_duplex =
+			of_property_read_bool(eth1_node, "ti,half-duplex-capable");
 
 		prueth->emac[PRUETH_MAC1]->iep = prueth->iep0;
 	}
@@ -1452,6 +1632,7 @@ static const struct prueth_pdata am654_icssg_pdata = {
 
 static const struct prueth_pdata am64x_icssg_pdata = {
 	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
+	.quirk_10m_link_issue = 1,
 	.switch_mode = 1,
 };
 
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index f678d656a3ed..bba6da2e6bd8 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -50,13 +50,18 @@
 
 #define ICSSG_MAX_RFLOWS	8	/* per slice */
 
+#define ICSSG_NUM_PA_STATS	4
+#define ICSSG_NUM_MIIG_STATS	60
 /* Number of ICSSG related stats */
-#define ICSSG_NUM_STATS 60
+#define ICSSG_NUM_STATS (ICSSG_NUM_MIIG_STATS + ICSSG_NUM_PA_STATS)
 #define ICSSG_NUM_STANDARD_STATS 31
 #define ICSSG_NUM_ETHTOOL_STATS (ICSSG_NUM_STATS - ICSSG_NUM_STANDARD_STATS)
 
 #define IEP_DEFAULT_CYCLE_TIME_NS	1000000	/* 1 ms */
 
+#define PRUETH_UNDIRECTED_PKT_DST_TAG	0
+#define PRUETH_UNDIRECTED_PKT_TAG_INS	BIT(30)
+
 /* Firmware status codes */
 #define ICSS_HS_FW_READY 0x55555555
 #define ICSS_HS_FW_DEAD 0xDEAD0000	/* lower 16 bits contain error code */
@@ -190,7 +195,8 @@ struct prueth_emac {
 	int port_vlan;
 
 	struct delayed_work stats_work;
-	u64 stats[ICSSG_NUM_STATS];
+	u64 stats[ICSSG_NUM_MIIG_STATS];
+	u64 pa_stats[ICSSG_NUM_PA_STATS];
 
 	/* RX IRQ Coalescing Related */
 	struct hrtimer rx_hrtimer;
@@ -230,6 +236,7 @@ struct icssg_firmwares {
  * @registered_netdevs: list of registered netdevs
  * @miig_rt: regmap to mii_g_rt block
  * @mii_rt: regmap to mii_rt block
+ * @pa_stats: regmap to pa_stats block
  * @pru_id: ID for each of the PRUs
  * @pdev: pointer to ICSSG platform device
  * @pdata: pointer to platform data for ICSSG driver
@@ -239,11 +246,14 @@ struct icssg_firmwares {
  * @iep1: pointer to IEP1 device
  * @vlan_tbl: VLAN-FID table pointer
  * @hw_bridge_dev: pointer to HW bridge net device
+ * @hsr_dev: pointer to the HSR net device
  * @br_members: bitmask of bridge member ports
+ * @hsr_members: bitmask of hsr member ports
  * @prueth_netdevice_nb: netdevice notifier block
  * @prueth_switchdev_nb: switchdev notifier block
  * @prueth_switchdev_bl_nb: switchdev blocking notifier block
  * @is_switch_mode: flag to indicate if device is in Switch mode
+ * @is_hsr_offload_mode: flag to indicate if device is in hsr offload mode
  * @is_switchmode_supported: indicates platform support for switch mode
  * @switch_id: ID for mapping switch ports to bridge
  * @default_vlan: Default VLAN for host
@@ -263,6 +273,7 @@ struct prueth {
 	struct net_device *registered_netdevs[PRUETH_NUM_MACS];
 	struct regmap *miig_rt;
 	struct regmap *mii_rt;
+	struct regmap *pa_stats;
 
 	enum pruss_pru_id pru_id[PRUSS_NUM_PRUS];
 	struct platform_device *pdev;
@@ -274,11 +285,14 @@ struct prueth {
 	struct prueth_vlan_tbl *vlan_tbl;
 
 	struct net_device *hw_bridge_dev;
+	struct net_device *hsr_dev;
 	u8 br_members;
+	u8 hsr_members;
 	struct notifier_block prueth_netdevice_nb;
 	struct notifier_block prueth_switchdev_nb;
 	struct notifier_block prueth_switchdev_bl_nb;
 	bool is_switch_mode;
+	bool is_hsr_offload_mode;
 	bool is_switchmode_supported;
 	unsigned char switch_id[MAX_PHYS_ITEM_ID_LEN];
 	int default_vlan;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c
index e180c1166170..292f04d29f4f 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c
@@ -847,6 +847,7 @@ static int prueth_netdev_init(struct prueth *prueth,
 	}
 	ether_addr_copy(emac->mac_addr, ndev->dev_addr);
 
+	ndev->dev.of_node = eth_node;
 	ndev->min_mtu = PRUETH_MIN_PKT_SIZE;
 	ndev->max_mtu = PRUETH_MAX_MTU;
 	ndev->netdev_ops = &emac_netdev_ops;
@@ -1045,8 +1046,8 @@ static int prueth_probe(struct platform_device *pdev)
 			goto exit_iep;
 		}
 
-		if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL))
-			prueth->emac[PRUETH_MAC0]->half_duplex = 1;
+		prueth->emac[PRUETH_MAC0]->half_duplex =
+			of_property_read_bool(eth0_node, "ti,half-duplex-capable");
 
 		prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
 	}
@@ -1059,8 +1060,8 @@ static int prueth_probe(struct platform_device *pdev)
 			goto netdev_exit;
 		}
 
-		if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL))
-			prueth->emac[PRUETH_MAC1]->half_duplex = 1;
+		prueth->emac[PRUETH_MAC1]->half_duplex =
+			of_property_read_bool(eth1_node, "ti,half-duplex-capable");
 
 		prueth->emac[PRUETH_MAC1]->iep = prueth->iep1;
 	}
diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c
index 2fb150c13078..8800bd3a8d07 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_stats.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c
@@ -11,6 +11,7 @@
 
 #define ICSSG_TX_PACKET_OFFSET	0xA0
 #define ICSSG_TX_BYTE_OFFSET	0xEC
+#define ICSSG_FW_STATS_BASE	0x0248
 
 static u32 stats_base[] = {	0x54c,	/* Slice 0 stats start */
 				0xb18,	/* Slice 1 stats start */
@@ -22,24 +23,34 @@ void emac_update_hardware_stats(struct prueth_emac *emac)
 	int slice = prueth_emac_slice(emac);
 	u32 base = stats_base[slice];
 	u32 tx_pkt_cnt = 0;
-	u32 val;
+	u32 val, reg;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(icssg_all_stats); i++) {
+	for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) {
 		regmap_read(prueth->miig_rt,
-			    base + icssg_all_stats[i].offset,
+			    base + icssg_all_miig_stats[i].offset,
 			    &val);
 		regmap_write(prueth->miig_rt,
-			     base + icssg_all_stats[i].offset,
+			     base + icssg_all_miig_stats[i].offset,
 			     val);
 
-		if (icssg_all_stats[i].offset == ICSSG_TX_PACKET_OFFSET)
+		if (icssg_all_miig_stats[i].offset == ICSSG_TX_PACKET_OFFSET)
 			tx_pkt_cnt = val;
 
 		emac->stats[i] += val;
-		if (icssg_all_stats[i].offset == ICSSG_TX_BYTE_OFFSET)
+		if (icssg_all_miig_stats[i].offset == ICSSG_TX_BYTE_OFFSET)
 			emac->stats[i] -= tx_pkt_cnt * 8;
 	}
+
+	if (prueth->pa_stats) {
+		for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) {
+			reg = ICSSG_FW_STATS_BASE +
+			      icssg_all_pa_stats[i].offset *
+			      PRUETH_NUM_MACS + slice * sizeof(u32);
+			regmap_read(prueth->pa_stats, reg, &val);
+			emac->pa_stats[i] += val;
+		}
+	}
 }
 
 void icssg_stats_work_handler(struct work_struct *work)
@@ -57,9 +68,16 @@ int emac_get_stat_by_name(struct prueth_emac *emac, char *stat_name)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(icssg_all_stats); i++) {
-		if (!strcmp(icssg_all_stats[i].name, stat_name))
-			return emac->stats[icssg_all_stats[i].offset / sizeof(u32)];
+	for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) {
+		if (!strcmp(icssg_all_miig_stats[i].name, stat_name))
+			return emac->stats[icssg_all_miig_stats[i].offset / sizeof(u32)];
+	}
+
+	if (emac->prueth->pa_stats) {
+		for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) {
+			if (!strcmp(icssg_all_pa_stats[i].name, stat_name))
+				return emac->pa_stats[icssg_all_pa_stats[i].offset / sizeof(u32)];
+		}
 	}
 
 	netdev_err(emac->ndev, "Invalid stats %s\n", stat_name);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.h b/drivers/net/ethernet/ti/icssg/icssg_stats.h
index 999a4a91276c..e88b919f532c 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_stats.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_stats.h
@@ -77,82 +77,114 @@ struct miig_stats_regs {
 	u32 tx_bytes;
 };
 
-#define ICSSG_STATS(field, stats_type)			\
+#define ICSSG_MIIG_STATS(field, stats_type)			\
 {							\
 	#field,						\
 	offsetof(struct miig_stats_regs, field),	\
 	stats_type					\
 }
 
-struct icssg_stats {
+struct icssg_miig_stats {
 	char name[ETH_GSTRING_LEN];
 	u32 offset;
 	bool standard_stats;
 };
 
-static const struct icssg_stats icssg_all_stats[] = {
+static const struct icssg_miig_stats icssg_all_miig_stats[] = {
 	/* Rx */
-	ICSSG_STATS(rx_packets, true),
-	ICSSG_STATS(rx_broadcast_frames, false),
-	ICSSG_STATS(rx_multicast_frames, true),
-	ICSSG_STATS(rx_crc_errors, true),
-	ICSSG_STATS(rx_mii_error_frames, false),
-	ICSSG_STATS(rx_odd_nibble_frames, false),
-	ICSSG_STATS(rx_frame_max_size, true),
-	ICSSG_STATS(rx_max_size_error_frames, false),
-	ICSSG_STATS(rx_frame_min_size, true),
-	ICSSG_STATS(rx_min_size_error_frames, false),
-	ICSSG_STATS(rx_over_errors, true),
-	ICSSG_STATS(rx_class0_hits, false),
-	ICSSG_STATS(rx_class1_hits, false),
-	ICSSG_STATS(rx_class2_hits, false),
-	ICSSG_STATS(rx_class3_hits, false),
-	ICSSG_STATS(rx_class4_hits, false),
-	ICSSG_STATS(rx_class5_hits, false),
-	ICSSG_STATS(rx_class6_hits, false),
-	ICSSG_STATS(rx_class7_hits, false),
-	ICSSG_STATS(rx_class8_hits, false),
-	ICSSG_STATS(rx_class9_hits, false),
-	ICSSG_STATS(rx_class10_hits, false),
-	ICSSG_STATS(rx_class11_hits, false),
-	ICSSG_STATS(rx_class12_hits, false),
-	ICSSG_STATS(rx_class13_hits, false),
-	ICSSG_STATS(rx_class14_hits, false),
-	ICSSG_STATS(rx_class15_hits, false),
-	ICSSG_STATS(rx_smd_frags, false),
-	ICSSG_STATS(rx_bucket1_size, true),
-	ICSSG_STATS(rx_bucket2_size, true),
-	ICSSG_STATS(rx_bucket3_size, true),
-	ICSSG_STATS(rx_bucket4_size, true),
-	ICSSG_STATS(rx_64B_frames, true),
-	ICSSG_STATS(rx_bucket1_frames, true),
-	ICSSG_STATS(rx_bucket2_frames, true),
-	ICSSG_STATS(rx_bucket3_frames, true),
-	ICSSG_STATS(rx_bucket4_frames, true),
-	ICSSG_STATS(rx_bucket5_frames, true),
-	ICSSG_STATS(rx_bytes, true),
-	ICSSG_STATS(rx_tx_total_bytes, false),
+	ICSSG_MIIG_STATS(rx_packets, true),
+	ICSSG_MIIG_STATS(rx_broadcast_frames, false),
+	ICSSG_MIIG_STATS(rx_multicast_frames, true),
+	ICSSG_MIIG_STATS(rx_crc_errors, true),
+	ICSSG_MIIG_STATS(rx_mii_error_frames, false),
+	ICSSG_MIIG_STATS(rx_odd_nibble_frames, false),
+	ICSSG_MIIG_STATS(rx_frame_max_size, true),
+	ICSSG_MIIG_STATS(rx_max_size_error_frames, false),
+	ICSSG_MIIG_STATS(rx_frame_min_size, true),
+	ICSSG_MIIG_STATS(rx_min_size_error_frames, false),
+	ICSSG_MIIG_STATS(rx_over_errors, true),
+	ICSSG_MIIG_STATS(rx_class0_hits, false),
+	ICSSG_MIIG_STATS(rx_class1_hits, false),
+	ICSSG_MIIG_STATS(rx_class2_hits, false),
+	ICSSG_MIIG_STATS(rx_class3_hits, false),
+	ICSSG_MIIG_STATS(rx_class4_hits, false),
+	ICSSG_MIIG_STATS(rx_class5_hits, false),
+	ICSSG_MIIG_STATS(rx_class6_hits, false),
+	ICSSG_MIIG_STATS(rx_class7_hits, false),
+	ICSSG_MIIG_STATS(rx_class8_hits, false),
+	ICSSG_MIIG_STATS(rx_class9_hits, false),
+	ICSSG_MIIG_STATS(rx_class10_hits, false),
+	ICSSG_MIIG_STATS(rx_class11_hits, false),
+	ICSSG_MIIG_STATS(rx_class12_hits, false),
+	ICSSG_MIIG_STATS(rx_class13_hits, false),
+	ICSSG_MIIG_STATS(rx_class14_hits, false),
+	ICSSG_MIIG_STATS(rx_class15_hits, false),
+	ICSSG_MIIG_STATS(rx_smd_frags, false),
+	ICSSG_MIIG_STATS(rx_bucket1_size, true),
+	ICSSG_MIIG_STATS(rx_bucket2_size, true),
+	ICSSG_MIIG_STATS(rx_bucket3_size, true),
+	ICSSG_MIIG_STATS(rx_bucket4_size, true),
+	ICSSG_MIIG_STATS(rx_64B_frames, true),
+	ICSSG_MIIG_STATS(rx_bucket1_frames, true),
+	ICSSG_MIIG_STATS(rx_bucket2_frames, true),
+	ICSSG_MIIG_STATS(rx_bucket3_frames, true),
+	ICSSG_MIIG_STATS(rx_bucket4_frames, true),
+	ICSSG_MIIG_STATS(rx_bucket5_frames, true),
+	ICSSG_MIIG_STATS(rx_bytes, true),
+	ICSSG_MIIG_STATS(rx_tx_total_bytes, false),
 	/* Tx */
-	ICSSG_STATS(tx_packets, true),
-	ICSSG_STATS(tx_broadcast_frames, false),
-	ICSSG_STATS(tx_multicast_frames, false),
-	ICSSG_STATS(tx_odd_nibble_frames, false),
-	ICSSG_STATS(tx_underflow_errors, false),
-	ICSSG_STATS(tx_frame_max_size, true),
-	ICSSG_STATS(tx_max_size_error_frames, false),
-	ICSSG_STATS(tx_frame_min_size, true),
-	ICSSG_STATS(tx_min_size_error_frames, false),
-	ICSSG_STATS(tx_bucket1_size, true),
-	ICSSG_STATS(tx_bucket2_size, true),
-	ICSSG_STATS(tx_bucket3_size, true),
-	ICSSG_STATS(tx_bucket4_size, true),
-	ICSSG_STATS(tx_64B_frames, true),
-	ICSSG_STATS(tx_bucket1_frames, true),
-	ICSSG_STATS(tx_bucket2_frames, true),
-	ICSSG_STATS(tx_bucket3_frames, true),
-	ICSSG_STATS(tx_bucket4_frames, true),
-	ICSSG_STATS(tx_bucket5_frames, true),
-	ICSSG_STATS(tx_bytes, true),
+	ICSSG_MIIG_STATS(tx_packets, true),
+	ICSSG_MIIG_STATS(tx_broadcast_frames, false),
+	ICSSG_MIIG_STATS(tx_multicast_frames, false),
+	ICSSG_MIIG_STATS(tx_odd_nibble_frames, false),
+	ICSSG_MIIG_STATS(tx_underflow_errors, false),
+	ICSSG_MIIG_STATS(tx_frame_max_size, true),
+	ICSSG_MIIG_STATS(tx_max_size_error_frames, false),
+	ICSSG_MIIG_STATS(tx_frame_min_size, true),
+	ICSSG_MIIG_STATS(tx_min_size_error_frames, false),
+	ICSSG_MIIG_STATS(tx_bucket1_size, true),
+	ICSSG_MIIG_STATS(tx_bucket2_size, true),
+	ICSSG_MIIG_STATS(tx_bucket3_size, true),
+	ICSSG_MIIG_STATS(tx_bucket4_size, true),
+	ICSSG_MIIG_STATS(tx_64B_frames, true),
+	ICSSG_MIIG_STATS(tx_bucket1_frames, true),
+	ICSSG_MIIG_STATS(tx_bucket2_frames, true),
+	ICSSG_MIIG_STATS(tx_bucket3_frames, true),
+	ICSSG_MIIG_STATS(tx_bucket4_frames, true),
+	ICSSG_MIIG_STATS(tx_bucket5_frames, true),
+	ICSSG_MIIG_STATS(tx_bytes, true),
+};
+
+/**
+ * struct pa_stats_regs - ICSSG Firmware maintained PA Stats register
+ * @fw_rx_cnt: Number of valid packets sent by Rx PRU to Host on PSI
+ * @fw_tx_cnt: Number of valid packets copied by RTU0 to Tx queues
+ * @fw_tx_pre_overflow: Host Egress Q (Pre-emptible) Overflow Counter
+ * @fw_tx_exp_overflow: Host Egress Q (Express) Overflow Counter
+ */
+struct pa_stats_regs {
+	u32 fw_rx_cnt;
+	u32 fw_tx_cnt;
+	u32 fw_tx_pre_overflow;
+	u32 fw_tx_exp_overflow;
+};
+
+#define ICSSG_PA_STATS(field)			\
+{						\
+	#field,					\
+	offsetof(struct pa_stats_regs, field),	\
+}
+
+struct icssg_pa_stats {
+	char name[ETH_GSTRING_LEN];
+	u32 offset;
+};
+
+static const struct icssg_pa_stats icssg_all_pa_stats[] = {
+	ICSSG_PA_STATS(fw_rx_cnt),
+	ICSSG_PA_STATS(fw_tx_cnt),
+	ICSSG_PA_STATS(fw_tx_pre_overflow),
+	ICSSG_PA_STATS(fw_tx_exp_overflow),
 };
 
 #endif /* __NET_TI_ICSSG_STATS_H */
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index d286709ca3b9..63e686f0b119 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2012,8 +2012,6 @@ static int keystone_get_ts_info(struct net_device *ndev,
 		SOF_TIMESTAMPING_TX_HARDWARE |
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
 	info->phc_index = gbe_intf->gbe_dev->cpts->phc_index;
 	info->tx_types =
@@ -2030,10 +2028,7 @@ static int keystone_get_ts_info(struct net_device *ndev,
 				struct kernel_ethtool_ts_info *info)
 {
 	info->so_timestamping =
-		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE;
-	info->phc_index = -1;
+		SOF_TIMESTAMPING_TX_SOFTWARE;
 	info->tx_types = 0;
 	info->rx_filters = 0;
 	return 0;
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 87e67121477c..a4937c18d7cb 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2277,10 +2277,11 @@ spider_net_setup_netdev(struct spider_net_card *card)
 	netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
 	if (SPIDER_NET_RX_CSUM_DEFAULT)
 		netdev->features |= NETIF_F_RXCSUM;
-	netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX;
+	netdev->features |= NETIF_F_IP_CSUM;
 	/* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 	 *		NETIF_F_HW_VLAN_CTAG_FILTER
 	 */
+	netdev->lltx = true;
 
 	/* MTU range: 64 - 2294 */
 	netdev->min_mtu = SPIDER_NET_MIN_MTU;
diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
index edd8b59680e5..a04d4073def9 100644
--- a/drivers/net/ethernet/vertexcom/mse102x.c
+++ b/drivers/net/ethernet/vertexcom/mse102x.c
@@ -377,8 +377,8 @@ static int mse102x_tx_pkt_spi(struct mse102x_net *mse, struct sk_buff *txb,
 	int ret;
 	bool first = true;
 
-	if (txb->len < 60)
-		pad = 60 - txb->len;
+	if (txb->len < ETH_ZLEN)
+		pad = ETH_ZLEN - txb->len;
 
 	while (1) {
 		mse102x_tx_cmd_spi(mse, CMD_RTS | (txb->len + pad));
@@ -451,7 +451,7 @@ static void mse102x_tx_work(struct work_struct *work)
 
 	if (ret == -ETIMEDOUT) {
 		if (netif_msg_timer(mse))
-			netdev_err(mse->ndev, "tx work timeout\n");
+			netdev_err_once(mse->ndev, "tx work timeout\n");
 
 		mse->stats.tx_timeout++;
 	}
@@ -485,8 +485,8 @@ static void mse102x_init_mac(struct mse102x_net *mse, struct device_node *np)
 
 	if (ret) {
 		eth_hw_addr_random(ndev);
-		netdev_err(ndev, "Using random MAC address: %pM\n",
-			   ndev->dev_addr);
+		dev_warn(ndev->dev.parent, "Using random MAC address: %pM\n",
+			 ndev->dev_addr);
 	}
 }
 
@@ -622,8 +622,6 @@ static const struct ethtool_ops mse102x_ethtool_ops = {
 
 /* driver bus management functions */
 
-#ifdef CONFIG_PM_SLEEP
-
 static int mse102x_suspend(struct device *dev)
 {
 	struct mse102x_net *mse = dev_get_drvdata(dev);
@@ -649,9 +647,8 @@ static int mse102x_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(mse102x_pm_ops, mse102x_suspend, mse102x_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(mse102x_pm_ops, mse102x_suspend, mse102x_resume);
 
 static int mse102x_probe_spi(struct spi_device *spi)
 {
@@ -736,9 +733,6 @@ static void mse102x_remove_spi(struct spi_device *spi)
 	struct mse102x_net *mse = dev_get_drvdata(&spi->dev);
 	struct mse102x_net_spi *mses = to_mse102x_spi(mse);
 
-	if (netif_msg_drv(mse))
-		dev_info(&spi->dev, "remove\n");
-
 	mse102x_remove_device_debugfs(mses);
 	unregister_netdev(mse->ndev);
 }
@@ -761,7 +755,7 @@ static struct spi_driver mse102x_driver = {
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = mse102x_match_table,
-		.pm = &mse102x_pm_ops,
+		.pm = pm_sleep_ptr(&mse102x_pm_ops),
 	},
 	.probe = mse102x_probe_spi,
 	.remove = mse102x_remove_spi,
diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
index 85cdbdd44fec..e46ccebcfd22 100644
--- a/drivers/net/ethernet/wangxun/Kconfig
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -41,10 +41,9 @@ config TXGBE
 	tristate "Wangxun(R) 10GbE PCI Express adapters support"
 	depends on PCI
 	depends on COMMON_CLK
+	depends on I2C_DESIGNWARE_PLATFORM
 	select MARVELL_10G_PHY
 	select REGMAP
-	select I2C
-	select I2C_DESIGNWARE_PLATFORM
 	select PHYLINK
 	select HWMON if TXGBE=y
 	select SFP
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 1eecba984f3b..2b3d6586f44a 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -251,10 +251,7 @@ static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring,
 				  rx_buffer->page_offset;
 
 		/* prefetch first cache line of first page */
-		prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
-#endif
+		net_prefetch(page_addr);
 
 		/* allocate a skb to store the frags */
 		skb = napi_alloc_skb(&rx_ring->q_vector->napi, WX_RXBUFFER_256);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 1d57b047817b..b54bffda027b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -426,9 +426,9 @@ enum WX_MSCA_CMD_value {
 #define WX_MIN_RXD                   128
 #define WX_MIN_TXD                   128
 
-/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define WX_REQ_RX_DESCRIPTOR_MULTIPLE   8
-#define WX_REQ_TX_DESCRIPTOR_MULTIPLE   8
+/* Number of Transmit and Receive Descriptors must be a multiple of 128 */
+#define WX_REQ_RX_DESCRIPTOR_MULTIPLE   128
+#define WX_REQ_TX_DESCRIPTOR_MULTIPLE   128
 
 #define WX_MAX_JUMBO_FRAME_SIZE      9432 /* max payload 9414 */
 #define VMDQ_P(p)                    p
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index ec54b18c5fe7..a5e9b779c44d 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -124,8 +124,12 @@ static int ngbe_phylink_init(struct wx *wx)
 				   MAC_SYM_PAUSE | MAC_ASYM_PAUSE;
 	config->mac_managed_pm = true;
 
-	phy_mode = PHY_INTERFACE_MODE_RGMII_ID;
-	__set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces);
+	/* The MAC only has add the Tx delay and it can not be modified.
+	 * So just disable TX delay in PHY, and it is does not matter to
+	 * internal phy.
+	 */
+	phy_mode = PHY_INTERFACE_MODE_RGMII_RXID;
+	__set_bit(PHY_INTERFACE_MODE_RGMII_RXID, config->supported_interfaces);
 
 	phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops);
 	if (IS_ERR(phylink))
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
index d6b2b3c781b6..cd1372da92a9 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
@@ -103,8 +103,7 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum)
 		if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM)
 			*checksum += local_buffer[i];
 
-	if (eeprom_ptrs)
-		kvfree(eeprom_ptrs);
+	kvfree(eeprom_ptrs);
 
 	*checksum = TXGBE_EEPROM_SUM - *checksum;
 
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 5f502265f0a6..67b61afdde96 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -688,8 +688,7 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe)
 	mii_bus->parent = &pdev->dev;
 	mii_bus->phy_mask = GENMASK(31, 1);
 	mii_bus->priv = wx;
-	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "txgbe-%x",
-		 (pdev->bus->number << 8) | pdev->devfn);
+	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "txgbe-%x", pci_dev_id(pdev));
 
 	ret = devm_mdiobus_register(&pdev->dev, mii_bus);
 	if (ret) {
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index fa5500decc96..d64b8abcf018 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -29,26 +29,26 @@
 /* Configuration options */
 
 /* Accept all incoming packets. Default: disabled (cleared) */
-#define XAE_OPTION_PROMISC			(1 << 0)
+#define XAE_OPTION_PROMISC			BIT(0)
 
 /* Jumbo frame support for Tx & Rx. Default: disabled (cleared) */
-#define XAE_OPTION_JUMBO			(1 << 1)
+#define XAE_OPTION_JUMBO			BIT(1)
 
 /* VLAN Rx & Tx frame support. Default: disabled (cleared) */
-#define XAE_OPTION_VLAN				(1 << 2)
+#define XAE_OPTION_VLAN				BIT(2)
 
 /* Enable recognition of flow control frames on Rx. Default: enabled (set) */
-#define XAE_OPTION_FLOW_CONTROL			(1 << 4)
+#define XAE_OPTION_FLOW_CONTROL			BIT(4)
 
 /* Strip FCS and PAD from incoming frames. Note: PAD from VLAN frames is not
  * stripped. Default: disabled (set)
  */
-#define XAE_OPTION_FCS_STRIP			(1 << 5)
+#define XAE_OPTION_FCS_STRIP			BIT(5)
 
 /* Generate FCS field and add PAD automatically for outgoing frames.
  * Default: enabled (set)
  */
-#define XAE_OPTION_FCS_INSERT			(1 << 6)
+#define XAE_OPTION_FCS_INSERT			BIT(6)
 
 /* Enable Length/Type error checking for incoming frames. When this option is
  * set, the MAC will filter frames that have a mismatched type/length field
@@ -56,13 +56,13 @@
  * types of frames are encountered. When this option is cleared, the MAC will
  * allow these types of frames to be received. Default: enabled (set)
  */
-#define XAE_OPTION_LENTYPE_ERR			(1 << 7)
+#define XAE_OPTION_LENTYPE_ERR			BIT(7)
 
 /* Enable the transmitter. Default: enabled (set) */
-#define XAE_OPTION_TXEN				(1 << 11)
+#define XAE_OPTION_TXEN				BIT(11)
 
 /*  Enable the receiver. Default: enabled (set) */
-#define XAE_OPTION_RXEN				(1 << 12)
+#define XAE_OPTION_RXEN				BIT(12)
 
 /*  Default options set when device is initialized or reset */
 #define XAE_OPTION_DEFAULTS				   \
@@ -156,22 +156,27 @@
 #define XAE_TPID0_OFFSET	0x00000028 /* VLAN TPID0 register */
 #define XAE_TPID1_OFFSET	0x0000002C /* VLAN TPID1 register */
 #define XAE_PPST_OFFSET		0x00000030 /* PCS PMA Soft Temac Status Reg */
+#define XAE_STATS_OFFSET	0x00000200 /* Statistics counters */
 #define XAE_RCW0_OFFSET		0x00000400 /* Rx Configuration Word 0 */
 #define XAE_RCW1_OFFSET		0x00000404 /* Rx Configuration Word 1 */
 #define XAE_TC_OFFSET		0x00000408 /* Tx Configuration */
 #define XAE_FCC_OFFSET		0x0000040C /* Flow Control Configuration */
-#define XAE_EMMC_OFFSET		0x00000410 /* EMAC mode configuration */
-#define XAE_PHYC_OFFSET		0x00000414 /* RGMII/SGMII configuration */
+#define XAE_EMMC_OFFSET		0x00000410 /* MAC speed configuration */
+#define XAE_PHYC_OFFSET		0x00000414 /* RX Max Frame Configuration */
 #define XAE_ID_OFFSET		0x000004F8 /* Identification register */
-#define XAE_MDIO_MC_OFFSET	0x00000500 /* MII Management Config */
-#define XAE_MDIO_MCR_OFFSET	0x00000504 /* MII Management Control */
-#define XAE_MDIO_MWD_OFFSET	0x00000508 /* MII Management Write Data */
-#define XAE_MDIO_MRD_OFFSET	0x0000050C /* MII Management Read Data */
+#define XAE_ABILITY_OFFSET	0x000004FC /* Ability Register offset */
+#define XAE_MDIO_MC_OFFSET	0x00000500 /* MDIO Setup */
+#define XAE_MDIO_MCR_OFFSET	0x00000504 /* MDIO Control */
+#define XAE_MDIO_MWD_OFFSET	0x00000508 /* MDIO Write Data */
+#define XAE_MDIO_MRD_OFFSET	0x0000050C /* MDIO Read Data */
 #define XAE_UAW0_OFFSET		0x00000700 /* Unicast address word 0 */
 #define XAE_UAW1_OFFSET		0x00000704 /* Unicast address word 1 */
-#define XAE_FMI_OFFSET		0x00000708 /* Filter Mask Index */
+#define XAE_FMI_OFFSET		0x00000708 /* Frame Filter Control */
+#define XAE_FFE_OFFSET		0x0000070C /* Frame Filter Enable */
 #define XAE_AF0_OFFSET		0x00000710 /* Address Filter 0 */
 #define XAE_AF1_OFFSET		0x00000714 /* Address Filter 1 */
+#define XAE_AM0_OFFSET		0x00000750 /* Frame Filter Mask Value Bytes 3-0 */
+#define XAE_AM1_OFFSET		0x00000754 /* Frame Filter Mask Value Bytes 7-4 */
 
 #define XAE_TX_VLAN_DATA_OFFSET 0x00004000 /* TX VLAN data table address */
 #define XAE_RX_VLAN_DATA_OFFSET 0x00008000 /* RX VLAN data table address */
@@ -283,6 +288,16 @@
 #define XAE_PHYC_SGLINKSPD_100		0x40000000 /* SGMII link 100 Mbit */
 #define XAE_PHYC_SGLINKSPD_1000		0x80000000 /* SGMII link 1000 Mbit */
 
+/* Bit masks for Axi Ethernet ability register */
+#define XAE_ABILITY_PFC			BIT(16)
+#define XAE_ABILITY_FRAME_FILTER	BIT(10)
+#define XAE_ABILITY_HALF_DUPLEX		BIT(9)
+#define XAE_ABILITY_STATS		BIT(8)
+#define XAE_ABILITY_2_5G		BIT(3)
+#define XAE_ABILITY_1G			BIT(2)
+#define XAE_ABILITY_100M		BIT(1)
+#define XAE_ABILITY_10M			BIT(0)
+
 /* Bit masks for Axi Ethernet MDIO interface MC register */
 #define XAE_MDIO_MC_MDIOEN_MASK		0x00000040 /* MII management enable */
 #define XAE_MDIO_MC_CLOCK_DIVIDE_MAX	0x3F	   /* Maximum MDIO divisor */
@@ -308,7 +323,7 @@
  */
 #define XAE_UAW1_UNICASTADDR_MASK	0x0000FFFF
 
-/* Bit masks for Axi Ethernet FMI register */
+/* Bit masks for Axi Ethernet FMC register */
 #define XAE_FMI_PM_MASK			0x80000000 /* Promis. mode enable */
 #define XAE_FMI_IND_MASK		0x00000003 /* Index Mask */
 
@@ -326,11 +341,12 @@
 #define XAE_MULTICAST_CAM_TABLE_NUM	4
 
 /* Axi Ethernet Synthesis features */
-#define XAE_FEATURE_PARTIAL_RX_CSUM	(1 << 0)
-#define XAE_FEATURE_PARTIAL_TX_CSUM	(1 << 1)
-#define XAE_FEATURE_FULL_RX_CSUM	(1 << 2)
-#define XAE_FEATURE_FULL_TX_CSUM	(1 << 3)
-#define XAE_FEATURE_DMA_64BIT		(1 << 4)
+#define XAE_FEATURE_PARTIAL_RX_CSUM	BIT(0)
+#define XAE_FEATURE_PARTIAL_TX_CSUM	BIT(1)
+#define XAE_FEATURE_FULL_RX_CSUM	BIT(2)
+#define XAE_FEATURE_FULL_TX_CSUM	BIT(3)
+#define XAE_FEATURE_DMA_64BIT		BIT(4)
+#define XAE_FEATURE_STATS		BIT(5)
 
 #define XAE_NO_CSUM_OFFLOAD		0
 
@@ -344,6 +360,61 @@
 #define XLNX_MII_STD_SELECT_REG		0x11
 #define XLNX_MII_STD_SELECT_SGMII	BIT(0)
 
+/* enum temac_stat - TEMAC statistics counters
+ *
+ * Index of statistics counters within the TEMAC. This must match the
+ * order/offset of hardware registers exactly.
+ */
+enum temac_stat {
+	STAT_RX_BYTES = 0,
+	STAT_TX_BYTES,
+	STAT_UNDERSIZE_FRAMES,
+	STAT_FRAGMENT_FRAMES,
+	STAT_RX_64_BYTE_FRAMES,
+	STAT_RX_65_127_BYTE_FRAMES,
+	STAT_RX_128_255_BYTE_FRAMES,
+	STAT_RX_256_511_BYTE_FRAMES,
+	STAT_RX_512_1023_BYTE_FRAMES,
+	STAT_RX_1024_MAX_BYTE_FRAMES,
+	STAT_RX_OVERSIZE_FRAMES,
+	STAT_TX_64_BYTE_FRAMES,
+	STAT_TX_65_127_BYTE_FRAMES,
+	STAT_TX_128_255_BYTE_FRAMES,
+	STAT_TX_256_511_BYTE_FRAMES,
+	STAT_TX_512_1023_BYTE_FRAMES,
+	STAT_TX_1024_MAX_BYTE_FRAMES,
+	STAT_TX_OVERSIZE_FRAMES,
+	STAT_RX_GOOD_FRAMES,
+	STAT_RX_FCS_ERRORS,
+	STAT_RX_BROADCAST_FRAMES,
+	STAT_RX_MULTICAST_FRAMES,
+	STAT_RX_CONTROL_FRAMES,
+	STAT_RX_LENGTH_ERRORS,
+	STAT_RX_VLAN_FRAMES,
+	STAT_RX_PAUSE_FRAMES,
+	STAT_RX_CONTROL_OPCODE_ERRORS,
+	STAT_TX_GOOD_FRAMES,
+	STAT_TX_BROADCAST_FRAMES,
+	STAT_TX_MULTICAST_FRAMES,
+	STAT_TX_UNDERRUN_ERRORS,
+	STAT_TX_CONTROL_FRAMES,
+	STAT_TX_VLAN_FRAMES,
+	STAT_TX_PAUSE_FRAMES,
+	STAT_TX_SINGLE_COLLISION_FRAMES,
+	STAT_TX_MULTIPLE_COLLISION_FRAMES,
+	STAT_TX_DEFERRED_FRAMES,
+	STAT_TX_LATE_COLLISIONS,
+	STAT_TX_EXCESS_COLLISIONS,
+	STAT_TX_EXCESS_DEFERRAL,
+	STAT_RX_ALIGNMENT_ERRORS,
+	STAT_TX_PFC_FRAMES,
+	STAT_RX_PFC_FRAMES,
+	STAT_USER_DEFINED0,
+	STAT_USER_DEFINED1,
+	STAT_USER_DEFINED2,
+	STAT_COUNT,
+};
+
 /**
  * struct axidma_bd - Axi Dma buffer descriptor layout
  * @next:         MM2S/S2MM Next Descriptor Pointer
@@ -434,7 +505,19 @@ struct skbuf_dma_descriptor {
  * @tx_packets: TX packet count for statistics
  * @tx_bytes:	TX byte count for statistics
  * @tx_stat_sync: Synchronization object for TX stats
+ * @hw_stat_base: Base offset for statistics counters. This may be nonzero if
+ *                the statistics counteres were reset or wrapped around.
+ * @hw_last_counter: Last-seen value of each statistic counter
+ * @reset_in_progress: Set while we are performing a reset and statistics
+ *                     counters may be invalid
+ * @hw_stats_seqcount: Sequence counter for @hw_stat_base, @hw_last_counter,
+ *                     and @reset_in_progress.
+ * @stats_lock: Lock for @hw_stats_seqcount
+ * @stats_work: Work for reading the hardware statistics counters often enough
+ *              to catch overflows.
  * @dma_err_task: Work structure to process Axi DMA errors
+ * @stopping:   Set when @dma_err_task shouldn't do anything because we are
+ *              about to stop the device.
  * @tx_irq:	Axidma TX IRQ number
  * @rx_irq:	Axidma RX IRQ number
  * @eth_irq:	Ethernet core IRQ number
@@ -446,8 +529,6 @@ struct skbuf_dma_descriptor {
  *		  supported, the maximum frame size would be 9k. Else it is
  *		  1522 bytes (assuming support for basic VLAN)
  * @rxmem:	Stores rx memory size for jumbo frame handling.
- * @csum_offload_on_tx_path:	Stores the checksum selection on TX side.
- * @csum_offload_on_rx_path:	Stores the checksum selection on RX side.
  * @coalesce_count_rx:	Store the irq coalesce on RX side.
  * @coalesce_usec_rx:	IRQ coalesce delay for RX
  * @coalesce_count_tx:	Store the irq coalesce on TX side.
@@ -505,7 +586,15 @@ struct axienet_local {
 	u64_stats_t tx_bytes;
 	struct u64_stats_sync tx_stat_sync;
 
+	u64 hw_stat_base[STAT_COUNT];
+	u32 hw_last_counter[STAT_COUNT];
+	seqcount_mutex_t hw_stats_seqcount;
+	struct mutex stats_lock;
+	struct delayed_work stats_work;
+	bool reset_in_progress;
+
 	struct work_struct dma_err_task;
+	bool stopping;
 
 	int tx_irq;
 	int rx_irq;
@@ -518,9 +607,6 @@ struct axienet_local {
 	u32 max_frm_size;
 	u32 rxmem;
 
-	int csum_offload_on_tx_path;
-	int csum_offload_on_rx_path;
-
 	u32 coalesce_count_rx;
 	u32 coalesce_usec_rx;
 	u32 coalesce_count_tx;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index e342f387c3dd..fc35fcb22d94 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -415,6 +415,7 @@ static void axienet_set_mac_address(struct net_device *ndev,
 static int netdev_set_mac_address(struct net_device *ndev, void *p)
 {
 	struct sockaddr *addr = p;
+
 	axienet_set_mac_address(ndev, addr->sa_data);
 	return 0;
 }
@@ -432,25 +433,31 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p)
  */
 static void axienet_set_multicast_list(struct net_device *ndev)
 {
-	int i;
+	int i = 0;
 	u32 reg, af0reg, af1reg;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) ||
-	    netdev_mc_count(ndev) > XAE_MULTICAST_CAM_TABLE_NUM) {
-		/* We must make the kernel realize we had to move into
-		 * promiscuous mode. If it was a promiscuous mode request
-		 * the flag is already set. If not we set it.
-		 */
-		ndev->flags |= IFF_PROMISC;
-		reg = axienet_ior(lp, XAE_FMI_OFFSET);
+	reg = axienet_ior(lp, XAE_FMI_OFFSET);
+	reg &= ~XAE_FMI_PM_MASK;
+	if (ndev->flags & IFF_PROMISC)
 		reg |= XAE_FMI_PM_MASK;
+	else
+		reg &= ~XAE_FMI_PM_MASK;
+	axienet_iow(lp, XAE_FMI_OFFSET, reg);
+
+	if (ndev->flags & IFF_ALLMULTI ||
+	    netdev_mc_count(ndev) > XAE_MULTICAST_CAM_TABLE_NUM) {
+		reg &= 0xFFFFFF00;
 		axienet_iow(lp, XAE_FMI_OFFSET, reg);
-		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
+		axienet_iow(lp, XAE_AF0_OFFSET, 1); /* Multicast bit */
+		axienet_iow(lp, XAE_AF1_OFFSET, 0);
+		axienet_iow(lp, XAE_AM0_OFFSET, 1); /* ditto */
+		axienet_iow(lp, XAE_AM1_OFFSET, 0);
+		axienet_iow(lp, XAE_FFE_OFFSET, 1);
+		i = 1;
 	} else if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
-		i = 0;
 		netdev_for_each_mc_addr(ha, ndev) {
 			if (i >= XAE_MULTICAST_CAM_TABLE_NUM)
 				break;
@@ -463,30 +470,24 @@ static void axienet_set_multicast_list(struct net_device *ndev)
 			af1reg = (ha->addr[4]);
 			af1reg |= (ha->addr[5] << 8);
 
-			reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00;
+			reg &= 0xFFFFFF00;
 			reg |= i;
 
 			axienet_iow(lp, XAE_FMI_OFFSET, reg);
 			axienet_iow(lp, XAE_AF0_OFFSET, af0reg);
 			axienet_iow(lp, XAE_AF1_OFFSET, af1reg);
+			axienet_iow(lp, XAE_AM0_OFFSET, 0xffffffff);
+			axienet_iow(lp, XAE_AM1_OFFSET, 0x0000ffff);
+			axienet_iow(lp, XAE_FFE_OFFSET, 1);
 			i++;
 		}
-	} else {
-		reg = axienet_ior(lp, XAE_FMI_OFFSET);
-		reg &= ~XAE_FMI_PM_MASK;
+	}
 
+	for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) {
+		reg &= 0xFFFFFF00;
+		reg |= i;
 		axienet_iow(lp, XAE_FMI_OFFSET, reg);
-
-		for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) {
-			reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00;
-			reg |= i;
-
-			axienet_iow(lp, XAE_FMI_OFFSET, reg);
-			axienet_iow(lp, XAE_AF0_OFFSET, 0);
-			axienet_iow(lp, XAE_AF1_OFFSET, 0);
-		}
-
-		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
+		axienet_iow(lp, XAE_FFE_OFFSET, 0);
 	}
 }
 
@@ -518,11 +519,55 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
 	lp->options |= options;
 }
 
+static u64 axienet_stat(struct axienet_local *lp, enum temac_stat stat)
+{
+	u32 counter;
+
+	if (lp->reset_in_progress)
+		return lp->hw_stat_base[stat];
+
+	counter = axienet_ior(lp, XAE_STATS_OFFSET + stat * 8);
+	return lp->hw_stat_base[stat] + (counter - lp->hw_last_counter[stat]);
+}
+
+static void axienet_stats_update(struct axienet_local *lp, bool reset)
+{
+	enum temac_stat stat;
+
+	write_seqcount_begin(&lp->hw_stats_seqcount);
+	lp->reset_in_progress = reset;
+	for (stat = 0; stat < STAT_COUNT; stat++) {
+		u32 counter = axienet_ior(lp, XAE_STATS_OFFSET + stat * 8);
+
+		lp->hw_stat_base[stat] += counter - lp->hw_last_counter[stat];
+		lp->hw_last_counter[stat] = counter;
+	}
+	write_seqcount_end(&lp->hw_stats_seqcount);
+}
+
+static void axienet_refresh_stats(struct work_struct *work)
+{
+	struct axienet_local *lp = container_of(work, struct axienet_local,
+						stats_work.work);
+
+	mutex_lock(&lp->stats_lock);
+	axienet_stats_update(lp, false);
+	mutex_unlock(&lp->stats_lock);
+
+	/* Just less than 2^32 bytes at 2.5 GBit/s */
+	schedule_delayed_work(&lp->stats_work, 13 * HZ);
+}
+
 static int __axienet_device_reset(struct axienet_local *lp)
 {
 	u32 value;
 	int ret;
 
+	/* Save statistics counters in case they will be reset */
+	mutex_lock(&lp->stats_lock);
+	if (lp->features & XAE_FEATURE_STATS)
+		axienet_stats_update(lp, true);
+
 	/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
 	 * process of Axi DMA takes a while to complete as all pending
 	 * commands/transfers will be flushed or completed during this
@@ -537,7 +582,7 @@ static int __axienet_device_reset(struct axienet_local *lp)
 				XAXIDMA_TX_CR_OFFSET);
 	if (ret) {
 		dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
-		return ret;
+		goto out;
 	}
 
 	/* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
@@ -547,10 +592,29 @@ static int __axienet_device_reset(struct axienet_local *lp)
 				XAE_IS_OFFSET);
 	if (ret) {
 		dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
-		return ret;
+		goto out;
 	}
 
-	return 0;
+	/* Update statistics counters with new values */
+	if (lp->features & XAE_FEATURE_STATS) {
+		enum temac_stat stat;
+
+		write_seqcount_begin(&lp->hw_stats_seqcount);
+		lp->reset_in_progress = false;
+		for (stat = 0; stat < STAT_COUNT; stat++) {
+			u32 counter =
+				axienet_ior(lp, XAE_STATS_OFFSET + stat * 8);
+
+			lp->hw_stat_base[stat] +=
+				lp->hw_last_counter[stat] - counter;
+			lp->hw_last_counter[stat] = counter;
+		}
+		write_seqcount_end(&lp->hw_stats_seqcount);
+	}
+
+out:
+	mutex_unlock(&lp->stats_lock);
+	return ret;
 }
 
 /**
@@ -613,8 +677,7 @@ static int axienet_device_reset(struct net_device *ndev)
 	lp->options |= XAE_OPTION_VLAN;
 	lp->options &= (~XAE_OPTION_JUMBO);
 
-	if ((ndev->mtu > XAE_MTU) &&
-	    (ndev->mtu <= XAE_JUMBO_MTU)) {
+	if (ndev->mtu > XAE_MTU && ndev->mtu <= XAE_JUMBO_MTU) {
 		lp->max_frm_size = ndev->mtu + VLAN_ETH_HLEN +
 					XAE_TRL_SIZE;
 
@@ -673,15 +736,15 @@ static int axienet_device_reset(struct net_device *ndev)
  *
  * Would either be called after a successful transmit operation, or after
  * there was an error when setting up the chain.
- * Returns the number of descriptors handled.
+ * Returns the number of packets handled.
  */
 static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 				 int nr_bds, bool force, u32 *sizep, int budget)
 {
 	struct axidma_bd *cur_p;
 	unsigned int status;
+	int i, packets = 0;
 	dma_addr_t phys;
-	int i;
 
 	for (i = 0; i < nr_bds; i++) {
 		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
@@ -700,8 +763,10 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 				 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
 				 DMA_TO_DEVICE);
 
-		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
+		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
 			napi_consume_skb(cur_p->skb, budget);
+			packets++;
+		}
 
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
@@ -717,7 +782,13 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 			*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
 	}
 
-	return i;
+	if (!force) {
+		lp->tx_bd_ci += i;
+		if (lp->tx_bd_ci >= lp->tx_bd_num)
+			lp->tx_bd_ci %= lp->tx_bd_num;
+	}
+
+	return packets;
 }
 
 /**
@@ -890,13 +961,10 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget)
 	u32 size = 0;
 	int packets;
 
-	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false, &size, budget);
+	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num, false,
+					&size, budget);
 
 	if (packets) {
-		lp->tx_bd_ci += packets;
-		if (lp->tx_bd_ci >= lp->tx_bd_num)
-			lp->tx_bd_ci %= lp->tx_bd_num;
-
 		u64_stats_update_begin(&lp->tx_stat_sync);
 		u64_stats_add(&lp->tx_packets, packets);
 		u64_stats_add(&lp->tx_bytes, size);
@@ -1125,9 +1193,7 @@ static int axienet_rx_poll(struct napi_struct *napi, int budget)
 				    csumstatus == XAE_IP_UDP_CSUM_VALIDATED) {
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
 				}
-			} else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 &&
-				   skb->protocol == htons(ETH_P_IP) &&
-				   skb->len > 64) {
+			} else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) {
 				skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF);
 				skb->ip_summed = CHECKSUM_COMPLETE;
 			}
@@ -1221,9 +1287,10 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev)
 		u32 cr = lp->tx_dma_cr;
 
 		cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
-		axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
-
-		napi_schedule(&lp->napi_tx);
+		if (napi_schedule_prep(&lp->napi_tx)) {
+			axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+			__napi_schedule(&lp->napi_tx);
+		}
 	}
 
 	return IRQ_HANDLED;
@@ -1265,9 +1332,10 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev)
 		u32 cr = lp->rx_dma_cr;
 
 		cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
-		axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
-
-		napi_schedule(&lp->napi_rx);
+		if (napi_schedule_prep(&lp->napi_rx)) {
+			axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+			__napi_schedule(&lp->napi_rx);
+		}
 	}
 
 	return IRQ_HANDLED;
@@ -1296,7 +1364,7 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
 		ndev->stats.rx_missed_errors++;
 
 	if (pending & XAE_INT_RXRJECT_MASK)
-		ndev->stats.rx_frame_errors++;
+		ndev->stats.rx_dropped++;
 
 	axienet_iow(lp, XAE_IS_OFFSET, pending);
 	return IRQ_HANDLED;
@@ -1459,6 +1527,7 @@ static int axienet_init_legacy_dma(struct net_device *ndev)
 	struct axienet_local *lp = netdev_priv(ndev);
 
 	/* Enable worker thread for Axi DMA error handling */
+	lp->stopping = false;
 	INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler);
 
 	napi_enable(&lp->napi_rx);
@@ -1514,8 +1583,6 @@ static int axienet_open(struct net_device *ndev)
 	int ret;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	dev_dbg(&ndev->dev, "%s\n", __func__);
-
 	/* When we do an Axi Ethernet reset, it resets the complete core
 	 * including the MDIO. MDIO must be disabled before resetting.
 	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
@@ -1532,6 +1599,9 @@ static int axienet_open(struct net_device *ndev)
 
 	phylink_start(lp->phylink);
 
+	/* Start the statistics refresh work */
+	schedule_delayed_work(&lp->stats_work, 0);
+
 	if (lp->use_dmaengine) {
 		/* Enable interrupts for Axi Ethernet core (if defined) */
 		if (lp->eth_irq > 0) {
@@ -1556,6 +1626,7 @@ err_free_eth_irq:
 	if (lp->eth_irq > 0)
 		free_irq(lp->eth_irq, ndev);
 err_phy:
+	cancel_delayed_work_sync(&lp->stats_work);
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
 	return ret;
@@ -1576,13 +1647,16 @@ static int axienet_stop(struct net_device *ndev)
 	struct axienet_local *lp = netdev_priv(ndev);
 	int i;
 
-	dev_dbg(&ndev->dev, "axienet_close()\n");
-
 	if (!lp->use_dmaengine) {
+		WRITE_ONCE(lp->stopping, true);
+		flush_work(&lp->dma_err_task);
+
 		napi_disable(&lp->napi_tx);
 		napi_disable(&lp->napi_rx);
 	}
 
+	cancel_delayed_work_sync(&lp->stats_work);
+
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
 
@@ -1657,6 +1731,7 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu)
 static void axienet_poll_controller(struct net_device *ndev)
 {
 	struct axienet_local *lp = netdev_priv(ndev);
+
 	disable_irq(lp->tx_irq);
 	disable_irq(lp->rx_irq);
 	axienet_rx_irq(lp->tx_irq, ndev);
@@ -1695,6 +1770,35 @@ axienet_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		stats->tx_packets = u64_stats_read(&lp->tx_packets);
 		stats->tx_bytes = u64_stats_read(&lp->tx_bytes);
 	} while (u64_stats_fetch_retry(&lp->tx_stat_sync, start));
+
+	if (!(lp->features & XAE_FEATURE_STATS))
+		return;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		stats->rx_length_errors =
+			axienet_stat(lp, STAT_RX_LENGTH_ERRORS);
+		stats->rx_crc_errors = axienet_stat(lp, STAT_RX_FCS_ERRORS);
+		stats->rx_frame_errors =
+			axienet_stat(lp, STAT_RX_ALIGNMENT_ERRORS);
+		stats->rx_errors = axienet_stat(lp, STAT_UNDERSIZE_FRAMES) +
+				   axienet_stat(lp, STAT_FRAGMENT_FRAMES) +
+				   stats->rx_length_errors +
+				   stats->rx_crc_errors +
+				   stats->rx_frame_errors;
+		stats->multicast = axienet_stat(lp, STAT_RX_MULTICAST_FRAMES);
+
+		stats->tx_aborted_errors =
+			axienet_stat(lp, STAT_TX_EXCESS_COLLISIONS);
+		stats->tx_fifo_errors =
+			axienet_stat(lp, STAT_TX_UNDERRUN_ERRORS);
+		stats->tx_window_errors =
+			axienet_stat(lp, STAT_TX_LATE_COLLISIONS);
+		stats->tx_errors = axienet_stat(lp, STAT_TX_EXCESS_DEFERRAL) +
+				   stats->tx_aborted_errors +
+				   stats->tx_fifo_errors +
+				   stats->tx_window_errors;
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
 }
 
 static const struct net_device_ops axienet_netdev_ops = {
@@ -1987,6 +2091,213 @@ static int axienet_ethtools_nway_reset(struct net_device *dev)
 	return phylink_ethtool_nway_reset(lp->phylink);
 }
 
+static void axienet_ethtools_get_ethtool_stats(struct net_device *dev,
+					       struct ethtool_stats *stats,
+					       u64 *data)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+	unsigned int start;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		data[0] = axienet_stat(lp, STAT_RX_BYTES);
+		data[1] = axienet_stat(lp, STAT_TX_BYTES);
+		data[2] = axienet_stat(lp, STAT_RX_VLAN_FRAMES);
+		data[3] = axienet_stat(lp, STAT_TX_VLAN_FRAMES);
+		data[6] = axienet_stat(lp, STAT_TX_PFC_FRAMES);
+		data[7] = axienet_stat(lp, STAT_RX_PFC_FRAMES);
+		data[8] = axienet_stat(lp, STAT_USER_DEFINED0);
+		data[9] = axienet_stat(lp, STAT_USER_DEFINED1);
+		data[10] = axienet_stat(lp, STAT_USER_DEFINED2);
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
+}
+
+static const char axienet_ethtool_stats_strings[][ETH_GSTRING_LEN] = {
+	"Received bytes",
+	"Transmitted bytes",
+	"RX Good VLAN Tagged Frames",
+	"TX Good VLAN Tagged Frames",
+	"TX Good PFC Frames",
+	"RX Good PFC Frames",
+	"User Defined Counter 0",
+	"User Defined Counter 1",
+	"User Defined Counter 2",
+};
+
+static void axienet_ethtools_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(data, axienet_ethtool_stats_strings,
+		       sizeof(axienet_ethtool_stats_strings));
+		break;
+	}
+}
+
+static int axienet_ethtools_get_sset_count(struct net_device *dev, int sset)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		if (lp->features & XAE_FEATURE_STATS)
+			return ARRAY_SIZE(axienet_ethtool_stats_strings);
+		fallthrough;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+axienet_ethtools_get_pause_stats(struct net_device *dev,
+				 struct ethtool_pause_stats *pause_stats)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+	unsigned int start;
+
+	if (!(lp->features & XAE_FEATURE_STATS))
+		return;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		pause_stats->tx_pause_frames =
+			axienet_stat(lp, STAT_TX_PAUSE_FRAMES);
+		pause_stats->rx_pause_frames =
+			axienet_stat(lp, STAT_RX_PAUSE_FRAMES);
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
+}
+
+static void
+axienet_ethtool_get_eth_mac_stats(struct net_device *dev,
+				  struct ethtool_eth_mac_stats *mac_stats)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+	unsigned int start;
+
+	if (!(lp->features & XAE_FEATURE_STATS))
+		return;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		mac_stats->FramesTransmittedOK =
+			axienet_stat(lp, STAT_TX_GOOD_FRAMES);
+		mac_stats->SingleCollisionFrames =
+			axienet_stat(lp, STAT_TX_SINGLE_COLLISION_FRAMES);
+		mac_stats->MultipleCollisionFrames =
+			axienet_stat(lp, STAT_TX_MULTIPLE_COLLISION_FRAMES);
+		mac_stats->FramesReceivedOK =
+			axienet_stat(lp, STAT_RX_GOOD_FRAMES);
+		mac_stats->FrameCheckSequenceErrors =
+			axienet_stat(lp, STAT_RX_FCS_ERRORS);
+		mac_stats->AlignmentErrors =
+			axienet_stat(lp, STAT_RX_ALIGNMENT_ERRORS);
+		mac_stats->FramesWithDeferredXmissions =
+			axienet_stat(lp, STAT_TX_DEFERRED_FRAMES);
+		mac_stats->LateCollisions =
+			axienet_stat(lp, STAT_TX_LATE_COLLISIONS);
+		mac_stats->FramesAbortedDueToXSColls =
+			axienet_stat(lp, STAT_TX_EXCESS_COLLISIONS);
+		mac_stats->MulticastFramesXmittedOK =
+			axienet_stat(lp, STAT_TX_MULTICAST_FRAMES);
+		mac_stats->BroadcastFramesXmittedOK =
+			axienet_stat(lp, STAT_TX_BROADCAST_FRAMES);
+		mac_stats->FramesWithExcessiveDeferral =
+			axienet_stat(lp, STAT_TX_EXCESS_DEFERRAL);
+		mac_stats->MulticastFramesReceivedOK =
+			axienet_stat(lp, STAT_RX_MULTICAST_FRAMES);
+		mac_stats->BroadcastFramesReceivedOK =
+			axienet_stat(lp, STAT_RX_BROADCAST_FRAMES);
+		mac_stats->InRangeLengthErrors =
+			axienet_stat(lp, STAT_RX_LENGTH_ERRORS);
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
+}
+
+static void
+axienet_ethtool_get_eth_ctrl_stats(struct net_device *dev,
+				   struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+	unsigned int start;
+
+	if (!(lp->features & XAE_FEATURE_STATS))
+		return;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		ctrl_stats->MACControlFramesTransmitted =
+			axienet_stat(lp, STAT_TX_CONTROL_FRAMES);
+		ctrl_stats->MACControlFramesReceived =
+			axienet_stat(lp, STAT_RX_CONTROL_FRAMES);
+		ctrl_stats->UnsupportedOpcodesReceived =
+			axienet_stat(lp, STAT_RX_CONTROL_OPCODE_ERRORS);
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
+}
+
+static const struct ethtool_rmon_hist_range axienet_rmon_ranges[] = {
+	{   64,    64 },
+	{   65,   127 },
+	{  128,   255 },
+	{  256,   511 },
+	{  512,  1023 },
+	{ 1024,  1518 },
+	{ 1519, 16384 },
+	{ },
+};
+
+static void
+axienet_ethtool_get_rmon_stats(struct net_device *dev,
+			       struct ethtool_rmon_stats *rmon_stats,
+			       const struct ethtool_rmon_hist_range **ranges)
+{
+	struct axienet_local *lp = netdev_priv(dev);
+	unsigned int start;
+
+	if (!(lp->features & XAE_FEATURE_STATS))
+		return;
+
+	do {
+		start = read_seqcount_begin(&lp->hw_stats_seqcount);
+		rmon_stats->undersize_pkts =
+			axienet_stat(lp, STAT_UNDERSIZE_FRAMES);
+		rmon_stats->oversize_pkts =
+			axienet_stat(lp, STAT_RX_OVERSIZE_FRAMES);
+		rmon_stats->fragments =
+			axienet_stat(lp, STAT_FRAGMENT_FRAMES);
+
+		rmon_stats->hist[0] =
+			axienet_stat(lp, STAT_RX_64_BYTE_FRAMES);
+		rmon_stats->hist[1] =
+			axienet_stat(lp, STAT_RX_65_127_BYTE_FRAMES);
+		rmon_stats->hist[2] =
+			axienet_stat(lp, STAT_RX_128_255_BYTE_FRAMES);
+		rmon_stats->hist[3] =
+			axienet_stat(lp, STAT_RX_256_511_BYTE_FRAMES);
+		rmon_stats->hist[4] =
+			axienet_stat(lp, STAT_RX_512_1023_BYTE_FRAMES);
+		rmon_stats->hist[5] =
+			axienet_stat(lp, STAT_RX_1024_MAX_BYTE_FRAMES);
+		rmon_stats->hist[6] =
+			rmon_stats->oversize_pkts;
+
+		rmon_stats->hist_tx[0] =
+			axienet_stat(lp, STAT_TX_64_BYTE_FRAMES);
+		rmon_stats->hist_tx[1] =
+			axienet_stat(lp, STAT_TX_65_127_BYTE_FRAMES);
+		rmon_stats->hist_tx[2] =
+			axienet_stat(lp, STAT_TX_128_255_BYTE_FRAMES);
+		rmon_stats->hist_tx[3] =
+			axienet_stat(lp, STAT_TX_256_511_BYTE_FRAMES);
+		rmon_stats->hist_tx[4] =
+			axienet_stat(lp, STAT_TX_512_1023_BYTE_FRAMES);
+		rmon_stats->hist_tx[5] =
+			axienet_stat(lp, STAT_TX_1024_MAX_BYTE_FRAMES);
+		rmon_stats->hist_tx[6] =
+			axienet_stat(lp, STAT_TX_OVERSIZE_FRAMES);
+	} while (read_seqcount_retry(&lp->hw_stats_seqcount, start));
+
+	*ranges = axienet_rmon_ranges;
+}
+
 static const struct ethtool_ops axienet_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
 				     ETHTOOL_COALESCE_USECS,
@@ -2003,6 +2314,13 @@ static const struct ethtool_ops axienet_ethtool_ops = {
 	.get_link_ksettings = axienet_ethtools_get_link_ksettings,
 	.set_link_ksettings = axienet_ethtools_set_link_ksettings,
 	.nway_reset	= axienet_ethtools_nway_reset,
+	.get_ethtool_stats = axienet_ethtools_get_ethtool_stats,
+	.get_strings    = axienet_ethtools_get_strings,
+	.get_sset_count = axienet_ethtools_get_sset_count,
+	.get_pause_stats = axienet_ethtools_get_pause_stats,
+	.get_eth_mac_stats = axienet_ethtool_get_eth_mac_stats,
+	.get_eth_ctrl_stats = axienet_ethtool_get_eth_ctrl_stats,
+	.get_rmon_stats = axienet_ethtool_get_rmon_stats,
 };
 
 static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
@@ -2153,6 +2471,10 @@ static void axienet_dma_err_handler(struct work_struct *work)
 						dma_err_task);
 	struct net_device *ndev = lp->ndev;
 
+	/* Don't bother if we are going to stop anyway */
+	if (READ_ONCE(lp->stopping))
+		return;
+
 	napi_disable(&lp->napi_tx);
 	napi_disable(&lp->napi_rx);
 
@@ -2219,9 +2541,9 @@ static void axienet_dma_err_handler(struct work_struct *work)
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
 	axienet_set_mac_address(ndev, NULL);
 	axienet_set_multicast_list(ndev);
-	axienet_setoptions(ndev, lp->options);
 	napi_enable(&lp->napi_rx);
 	napi_enable(&lp->napi_tx);
+	axienet_setoptions(ndev, lp->options);
 }
 
 /**
@@ -2271,6 +2593,10 @@ static int axienet_probe(struct platform_device *pdev)
 	u64_stats_init(&lp->rx_stat_sync);
 	u64_stats_init(&lp->tx_stat_sync);
 
+	mutex_init(&lp->stats_lock);
+	seqcount_mutex_init(&lp->hw_stats_seqcount, &lp->stats_lock);
+	INIT_DEFERRABLE_WORK(&lp->stats_work, axienet_refresh_stats);
+
 	lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
 	if (!lp->axi_clk) {
 		/* For backward compatibility, if named AXI clock is not present,
@@ -2311,42 +2637,35 @@ static int axienet_probe(struct platform_device *pdev)
 	/* Setup checksum offload, but default to off if not specified */
 	lp->features = 0;
 
+	if (axienet_ior(lp, XAE_ABILITY_OFFSET) & XAE_ABILITY_STATS)
+		lp->features |= XAE_FEATURE_STATS;
+
 	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,txcsum", &value);
 	if (!ret) {
 		switch (value) {
 		case 1:
-			lp->csum_offload_on_tx_path =
-				XAE_FEATURE_PARTIAL_TX_CSUM;
 			lp->features |= XAE_FEATURE_PARTIAL_TX_CSUM;
-			/* Can checksum TCP/UDP over IPv4. */
-			ndev->features |= NETIF_F_IP_CSUM;
+			/* Can checksum any contiguous range */
+			ndev->features |= NETIF_F_HW_CSUM;
 			break;
 		case 2:
-			lp->csum_offload_on_tx_path =
-				XAE_FEATURE_FULL_TX_CSUM;
 			lp->features |= XAE_FEATURE_FULL_TX_CSUM;
 			/* Can checksum TCP/UDP over IPv4. */
 			ndev->features |= NETIF_F_IP_CSUM;
 			break;
-		default:
-			lp->csum_offload_on_tx_path = XAE_NO_CSUM_OFFLOAD;
 		}
 	}
 	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,rxcsum", &value);
 	if (!ret) {
 		switch (value) {
 		case 1:
-			lp->csum_offload_on_rx_path =
-				XAE_FEATURE_PARTIAL_RX_CSUM;
 			lp->features |= XAE_FEATURE_PARTIAL_RX_CSUM;
+			ndev->features |= NETIF_F_RXCSUM;
 			break;
 		case 2:
-			lp->csum_offload_on_rx_path =
-				XAE_FEATURE_FULL_RX_CSUM;
 			lp->features |= XAE_FEATURE_FULL_RX_CSUM;
+			ndev->features |= NETIF_F_RXCSUM;
 			break;
-		default:
-			lp->csum_offload_on_rx_path = XAE_NO_CSUM_OFFLOAD;
 		}
 	}
 	/* For supporting jumbo frames, the Axi Ethernet hardware must have
@@ -2396,7 +2715,7 @@ static int axienet_probe(struct platform_device *pdev)
 		goto cleanup_clk;
 	}
 
-	if (!of_find_property(pdev->dev.of_node, "dmas", NULL)) {
+	if (!of_property_present(pdev->dev.of_node, "dmas")) {
 		/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 		np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
 
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 56df37f8d50a..aef316278eb4 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1026,9 +1026,7 @@ static int ixp4xx_get_ts_info(struct net_device *dev,
 
 	if (info->phc_index < 0) {
 		info->so_timestamping =
-			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE;
+			SOF_TIMESTAMPING_TX_SOFTWARE;
 		return 0;
 	}
 	info->so_timestamping =