513 files changed, 42237 insertions, 11235 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index fe115b7caba0..93a2d4deb27c 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -76,6 +76,7 @@ source "drivers/net/ethernet/ezchip/Kconfig"
 source "drivers/net/ethernet/faraday/Kconfig"
 source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
+source "drivers/net/ethernet/google/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
 source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/huawei/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 7b5bf9682066..fb9155cffcff 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/
 obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/
 obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
+obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/
 obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
 obj-$(CONFIG_NET_VENDOR_HP) += hp/
 obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 9e06dff619c3..3434730a7699 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -224,8 +224,8 @@ static int emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 static void emac_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
 {
-	strlcpy(info->driver, DRV_NAME, sizeof(DRV_NAME));
-	strlcpy(info->version, DRV_VERSION, sizeof(DRV_VERSION));
+	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 	strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info));
 }
 
@@ -818,7 +818,6 @@ static int emac_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	db = netdev_priv(ndev);
-	memset(db, 0, sizeof(*db));
 
 	db->dev = &pdev->dev;
 	db->ndev = ndev;
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 9f80b73f90b1..d19f2ecf8e84 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -60,6 +60,7 @@ enum ena_admin_aq_feature_id {
 	ENA_ADMIN_MAX_QUEUES_NUM                    = 2,
 	ENA_ADMIN_HW_HINTS                          = 3,
 	ENA_ADMIN_LLQ                               = 4,
+	ENA_ADMIN_MAX_QUEUES_EXT                    = 7,
 	ENA_ADMIN_RSS_HASH_FUNCTION                 = 10,
 	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG          = 11,
 	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG      = 12,
@@ -421,7 +422,13 @@ struct ena_admin_get_set_feature_common_desc {
 	/* as appears in ena_admin_aq_feature_id */
 	u8 feature_id;
 
-	u16 reserved16;
+	/* The driver specifies the max feature version it supports and the
+	 * device responds with the currently supported feature version. The
+	 * field is zero based
+	 */
+	u8 feature_version;
+
+	u8 reserved8;
 };
 
 struct ena_admin_device_attr_feature_desc {
@@ -524,6 +531,39 @@ struct ena_admin_feature_llq_desc {
 
 	/* the stride control the driver selected to use */
 	u16 descriptors_stride_ctrl_enabled;
+
+	/* Maximum size in bytes taken by llq entries in a single tx burst.
+	 * Set to 0 when there is no such limit.
+	 */
+	u32 max_tx_burst_size;
+};
+
+struct ena_admin_queue_ext_feature_fields {
+	u32 max_tx_sq_num;
+
+	u32 max_tx_cq_num;
+
+	u32 max_rx_sq_num;
+
+	u32 max_rx_cq_num;
+
+	u32 max_tx_sq_depth;
+
+	u32 max_tx_cq_depth;
+
+	u32 max_rx_sq_depth;
+
+	u32 max_rx_cq_depth;
+
+	u32 max_tx_header_size;
+
+	/* Maximum Descriptors number, including meta descriptor, allowed for
+	 * a single Tx packet
+	 */
+	u16 max_per_packet_tx_descs;
+
+	/* Maximum Descriptors number allowed for a single Rx packet */
+	u16 max_per_packet_rx_descs;
 };
 
 struct ena_admin_queue_feature_desc {
@@ -832,6 +872,19 @@ struct ena_admin_get_feat_cmd {
 	u32 raw[11];
 };
 
+struct ena_admin_queue_ext_feature_desc {
+	/* version */
+	u8 version;
+
+	u8 reserved1[3];
+
+	union {
+		struct ena_admin_queue_ext_feature_fields max_queue_ext;
+
+		u32 raw[10];
+	};
+};
+
 struct ena_admin_get_feat_resp {
 	struct ena_admin_acq_common_desc acq_common_desc;
 
@@ -844,6 +897,8 @@ struct ena_admin_get_feat_resp {
 
 		struct ena_admin_queue_feature_desc max_queue;
 
+		struct ena_admin_queue_ext_feature_desc max_queue_ext;
+
 		struct ena_admin_feature_aenq_desc aenq;
 
 		struct ena_admin_get_feature_link_desc link;
@@ -908,7 +963,9 @@ struct ena_admin_aenq_common_desc {
 
 	u16 syndrom;
 
-	/* 0 : phase */
+	/* 0 : phase
+	 * 7:1 : reserved - MBZ
+	 */
 	u8 flags;
 
 	u8 reserved1[3];
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 7f8266b191ae..911a2e7a375a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -91,7 +91,7 @@ struct ena_com_stats_ctx {
 	struct ena_admin_acq_get_stats_resp get_resp;
 };
 
-static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
 				       struct ena_common_mem_addr *ena_addr,
 				       dma_addr_t addr)
 {
@@ -115,7 +115,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
 					 GFP_KERNEL);
 
 	if (!sq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -137,7 +137,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
 					 GFP_KERNEL);
 
 	if (!cq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -160,7 +160,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
 					   GFP_KERNEL);
 
 	if (!aenq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -190,7 +190,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
 	return 0;
 }
 
-static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+static void comp_ctxt_release(struct ena_com_admin_queue *queue,
 				     struct ena_comp_ctx *comp_ctx)
 {
 	comp_ctx->occupied = false;
@@ -277,7 +277,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 	return comp_ctx;
 }
 
-static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
 {
 	size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
 	struct ena_comp_ctx *comp_ctx;
@@ -285,7 +285,7 @@ static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
 
 	queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
 	if (unlikely(!queue->comp_ctx)) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -356,7 +356,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 		}
 
 		if (!io_sq->desc_addr.virt_addr) {
-			pr_err("memory allocation failed");
+			pr_err("memory allocation failed\n");
 			return -ENOMEM;
 		}
 	}
@@ -382,7 +382,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
 
 		if (!io_sq->bounce_buf_ctrl.base_buffer) {
-			pr_err("bounce buffer memory allocation failed");
+			pr_err("bounce buffer memory allocation failed\n");
 			return -ENOMEM;
 		}
 
@@ -396,6 +396,10 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 		       0x0, io_sq->llq_info.desc_list_entry_size);
 		io_sq->llq_buf_ctrl.descs_left_in_line =
 			io_sq->llq_info.descs_num_before_header;
+
+		if (io_sq->llq_info.max_entries_in_tx_burst > 0)
+			io_sq->entries_in_tx_burst_left =
+				io_sq->llq_info.max_entries_in_tx_burst;
 	}
 
 	io_sq->tail = 0;
@@ -436,7 +440,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 	}
 
 	if (!io_cq->cdesc_addr.virt_addr) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -727,6 +731,9 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
 		       supported_feat, llq_info->descs_num_before_header);
 	}
 
+	llq_info->max_entries_in_tx_burst =
+		(u16)(llq_features->max_tx_burst_size /	llq_default_cfg->llq_ring_entry_size_value);
+
 	rc = ena_com_set_llq(ena_dev);
 	if (rc)
 		pr_err("Cannot set LLQ configuration: %d\n", rc);
@@ -755,16 +762,26 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
 		admin_queue->stats.no_completion++;
 		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
 
-		if (comp_ctx->status == ENA_CMD_COMPLETED)
-			pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
-			       comp_ctx->cmd_opcode);
-		else
-			pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+		if (comp_ctx->status == ENA_CMD_COMPLETED) {
+			pr_err("The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n",
+			       comp_ctx->cmd_opcode,
+			       admin_queue->auto_polling ? "ON" : "OFF");
+			/* Check if fallback to polling is enabled */
+			if (admin_queue->auto_polling)
+				admin_queue->polling = true;
+		} else {
+			pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n",
 			       comp_ctx->cmd_opcode, comp_ctx->status);
-
-		admin_queue->running_state = false;
-		ret = -ETIME;
-		goto err;
+		}
+		/* Check if shifted to polling mode.
+		 * This will happen if there is a completion without an interrupt
+		 * and autopolling mode is enabled. Continuing normal execution in such case
+		 */
+		if (!admin_queue->polling) {
+			admin_queue->running_state = false;
+			ret = -ETIME;
+			goto err;
+		}
 	}
 
 	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
@@ -822,7 +839,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	}
 
 	if (read_resp->reg_off != offset) {
-		pr_err("Read failure: wrong offset provided");
+		pr_err("Read failure: wrong offset provided\n");
 		ret = ENA_MMIO_READ_TIMEOUT;
 	} else {
 		ret = read_resp->reg_val;
@@ -961,7 +978,8 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
 				  struct ena_admin_get_feat_resp *get_resp,
 				  enum ena_admin_aq_feature_id feature_id,
 				  dma_addr_t control_buf_dma_addr,
-				  u32 control_buff_size)
+				  u32 control_buff_size,
+				  u8 feature_ver)
 {
 	struct ena_com_admin_queue *admin_queue;
 	struct ena_admin_get_feat_cmd get_cmd;
@@ -992,7 +1010,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
 	}
 
 	get_cmd.control_buffer.length = control_buff_size;
-
+	get_cmd.feat_common.feature_version = feature_ver;
 	get_cmd.feat_common.feature_id = feature_id;
 
 	ret = ena_com_execute_admin_command(admin_queue,
@@ -1012,13 +1030,15 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
 
 static int ena_com_get_feature(struct ena_com_dev *ena_dev,
 			       struct ena_admin_get_feat_resp *get_resp,
-			       enum ena_admin_aq_feature_id feature_id)
+			       enum ena_admin_aq_feature_id feature_id,
+			       u8 feature_ver)
 {
 	return ena_com_get_feature_ex(ena_dev,
 				      get_resp,
 				      feature_id,
 				      0,
-				      0);
+				      0,
+				      feature_ver);
 }
 
 static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
@@ -1078,7 +1098,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
 	int ret;
 
 	ret = ena_com_get_feature(ena_dev, &get_resp,
-				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0);
 	if (unlikely(ret))
 		return ret;
 
@@ -1498,7 +1518,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
 	struct ena_admin_get_feat_resp get_resp;
 	int ret;
 
-	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0);
 	if (ret) {
 		pr_info("Can't get aenq configuration\n");
 		return ret;
@@ -1643,6 +1663,12 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
 	ena_dev->admin_queue.polling = polling;
 }
 
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+					 bool polling)
+{
+	ena_dev->admin_queue.auto_polling = polling;
+}
+
 int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
 {
 	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
@@ -1867,7 +1893,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
 int ena_com_get_link_params(struct ena_com_dev *ena_dev,
 			    struct ena_admin_get_feat_resp *resp)
 {
-	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0);
 }
 
 int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
@@ -1877,7 +1903,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	int rc;
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_DEVICE_ATTRIBUTES);
+				 ENA_ADMIN_DEVICE_ATTRIBUTES, 0);
 	if (rc)
 		return rc;
 
@@ -1885,17 +1911,34 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	       sizeof(get_resp.u.dev_attr));
 	ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
 
-	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_MAX_QUEUES_NUM);
-	if (rc)
-		return rc;
+	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		rc = ena_com_get_feature(ena_dev, &get_resp,
+					 ENA_ADMIN_MAX_QUEUES_EXT,
+					 ENA_FEATURE_MAX_QUEUE_EXT_VER);
+		if (rc)
+			return rc;
 
-	memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
-	       sizeof(get_resp.u.max_queue));
-	ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+		if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER)
+			return -EINVAL;
+
+		memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
+		       sizeof(get_resp.u.max_queue_ext));
+		ena_dev->tx_max_header_size =
+			get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size;
+	} else {
+		rc = ena_com_get_feature(ena_dev, &get_resp,
+					 ENA_ADMIN_MAX_QUEUES_NUM, 0);
+		memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+		       sizeof(get_resp.u.max_queue));
+		ena_dev->tx_max_header_size =
+			get_resp.u.max_queue.max_header_size;
+
+		if (rc)
+			return rc;
+	}
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_AENQ_CONFIG);
+				 ENA_ADMIN_AENQ_CONFIG, 0);
 	if (rc)
 		return rc;
 
@@ -1903,7 +1946,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	       sizeof(get_resp.u.aenq));
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
 	if (rc)
 		return rc;
 
@@ -1913,7 +1956,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	/* Driver hints isn't mandatory admin command. So in case the
 	 * command isn't supported set driver hints to 0
 	 */
-	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS);
+	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0);
 
 	if (!rc)
 		memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
@@ -1924,7 +1967,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 	else
 		return rc;
 
-	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ);
+	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0);
 	if (!rc)
 		memcpy(&get_feat_ctx->llq, &get_resp.u.llq,
 		       sizeof(get_resp.u.llq));
@@ -2161,7 +2204,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
 	struct ena_admin_get_feat_resp resp;
 
 	ret = ena_com_get_feature(ena_dev, &resp,
-				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
 	if (unlikely(ret)) {
 		pr_err("Failed to get offload capabilities %d\n", ret);
 		return ret;
@@ -2190,7 +2233,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
 
 	/* Validate hash function is supported */
 	ret = ena_com_get_feature(ena_dev, &get_resp,
-				  ENA_ADMIN_RSS_HASH_FUNCTION);
+				  ENA_ADMIN_RSS_HASH_FUNCTION, 0);
 	if (unlikely(ret))
 		return ret;
 
@@ -2250,7 +2293,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_FUNCTION,
 				    rss->hash_key_dma_addr,
-				    sizeof(*rss->hash_key));
+				    sizeof(*rss->hash_key), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2302,7 +2345,7 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_FUNCTION,
 				    rss->hash_key_dma_addr,
-				    sizeof(*rss->hash_key));
+				    sizeof(*rss->hash_key), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2327,7 +2370,7 @@ int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_INPUT,
 				    rss->hash_ctrl_dma_addr,
-				    sizeof(*rss->hash_ctrl));
+				    sizeof(*rss->hash_ctrl), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2563,7 +2606,7 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
 				    rss->rss_ind_tbl_dma_addr,
-				    tbl_size);
+				    tbl_size, 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2778,7 +2821,7 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
 	int rc;
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_INTERRUPT_MODERATION);
+				 ENA_ADMIN_INTERRUPT_MODERATION, 0);
 
 	if (rc) {
 		if (rc == -EOPNOTSUPP) {
@@ -2913,8 +2956,8 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
 			    struct ena_admin_feature_llq_desc *llq_features,
 			    struct ena_llq_configurations *llq_default_cfg)
 {
+	struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
 	int rc;
-	int size;
 
 	if (!llq_features->max_llq_num) {
 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
@@ -2925,12 +2968,10 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
 	if (rc)
 		return rc;
 
-	/* Validate the descriptor is not too big */
-	size = ena_dev->tx_max_header_size;
-	size += ena_dev->llq_info.descs_num_before_header *
-		sizeof(struct ena_eth_io_tx_desc);
+	ena_dev->tx_max_header_size = llq_info->desc_list_entry_size -
+		(llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
 
-	if (unlikely(ena_dev->llq_info.desc_list_entry_size < size)) {
+	if (unlikely(ena_dev->tx_max_header_size == 0)) {
 		pr_err("the size of the LLQ entry is smaller than needed\n");
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 078d6f2b4f39..0d3664fe260d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -101,6 +101,8 @@
 
 #define ENA_HW_HINTS_NO_TIMEOUT				0xFFFF
 
+#define ENA_FEATURE_MAX_QUEUE_EXT_VER	1
+
 enum ena_intr_moder_level {
 	ENA_INTR_MODER_LOWEST = 0,
 	ENA_INTR_MODER_LOW,
@@ -159,6 +161,7 @@ struct ena_com_llq_info {
 	u16 desc_list_entry_size;
 	u16 descs_num_before_header;
 	u16 descs_per_entry;
+	u16 max_entries_in_tx_burst;
 };
 
 struct ena_com_io_cq {
@@ -238,6 +241,7 @@ struct ena_com_io_sq {
 	u8 phase;
 	u8 desc_entry_size;
 	u8 dma_addr_bits;
+	u16 entries_in_tx_burst_left;
 } ____cacheline_aligned;
 
 struct ena_com_admin_cq {
@@ -281,6 +285,9 @@ struct ena_com_admin_queue {
 	/* Indicate if the admin queue should poll for completion */
 	bool polling;
 
+	/* Define if fallback to polling mode should occur */
+	bool auto_polling;
+
 	u16 curr_cmd_id;
 
 	/* Indicate that the ena was initialized and can
@@ -377,6 +384,7 @@ struct ena_com_dev {
 
 struct ena_com_dev_get_features_ctx {
 	struct ena_admin_queue_feature_desc max_queues;
+	struct ena_admin_queue_ext_feature_desc max_queue_ext;
 	struct ena_admin_device_attr_feature_desc dev_attr;
 	struct ena_admin_feature_aenq_desc aenq;
 	struct ena_admin_feature_offload_desc offload;
@@ -536,6 +544,17 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
  */
 bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
 
+/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the autopolling mode.
+ * If autopolling is on:
+ * In case of missing interrupt when data is available switch to polling.
+ */
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+					 bool polling);
+
 /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
  * @ena_dev: ENA communication layer struct
  *
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index f6c2d3855be8..38046bf0ff44 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -32,7 +32,7 @@
 
 #include "ena_eth_com.h"
 
-static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	struct ena_com_io_cq *io_cq)
 {
 	struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -59,7 +59,7 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	return cdesc;
 }
 
-static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
 {
 	u16 tail_masked;
 	u32 offset;
@@ -71,7 +71,7 @@ static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
 	return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
 }
 
-static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
+static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
 						     u8 *bounce_buffer)
 {
 	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
@@ -82,6 +82,17 @@ static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq
 	dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1);
 	dst_offset = dst_tail_mask * llq_info->desc_list_entry_size;
 
+	if (is_llq_max_tx_burst_exists(io_sq)) {
+		if (unlikely(!io_sq->entries_in_tx_burst_left)) {
+			pr_err("Error: trying to send more packets than tx burst allows\n");
+			return -ENOSPC;
+		}
+
+		io_sq->entries_in_tx_burst_left--;
+		pr_debug("decreasing entries_in_tx_burst_left of queue %d to %d\n",
+			 io_sq->qid, io_sq->entries_in_tx_burst_left);
+	}
+
 	/* Make sure everything was written into the bounce buffer before
 	 * writing the bounce buffer to the device
 	 */
@@ -100,7 +111,7 @@ static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq
 	return 0;
 }
 
-static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
+static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
 						 u8 *header_src,
 						 u16 header_len)
 {
@@ -131,7 +142,7 @@ static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
 	return 0;
 }
 
-static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
 {
 	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
 	u8 *bounce_buffer;
@@ -151,7 +162,7 @@ static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
 	return sq_desc;
 }
 
-static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
+static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
 {
 	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
 	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
@@ -178,7 +189,7 @@ static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
 	return 0;
 }
 
-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc(struct ena_com_io_sq *io_sq)
 {
 	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
 		return get_sq_desc_llq(io_sq);
@@ -186,7 +197,7 @@ static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
 	return get_sq_desc_regular_queue(io_sq);
 }
 
-static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
+static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
 {
 	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
 	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
@@ -214,7 +225,7 @@ static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
 	return 0;
 }
 
-static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
 {
 	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
 		return ena_com_sq_update_llq_tail(io_sq);
@@ -228,7 +239,7 @@ static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
 	return 0;
 }
 
-static inline struct ena_eth_io_rx_cdesc_base *
+static struct ena_eth_io_rx_cdesc_base *
 	ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
 {
 	idx &= (io_cq->q_depth - 1);
@@ -237,7 +248,7 @@ static inline struct ena_eth_io_rx_cdesc_base *
 		idx * io_cq->cdesc_entry_size_in_bytes);
 }
 
-static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 					   u16 *first_cdesc_idx)
 {
 	struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -274,24 +285,7 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 	return count;
 }
 
-static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
-					     struct ena_com_tx_ctx *ena_tx_ctx)
-{
-	int rc;
-
-	if (ena_tx_ctx->meta_valid) {
-		rc = memcmp(&io_sq->cached_tx_meta,
-			    &ena_tx_ctx->ena_meta,
-			    sizeof(struct ena_com_tx_meta));
-
-		if (unlikely(rc != 0))
-			return true;
-	}
-
-	return false;
-}
-
-static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
 							struct ena_com_tx_ctx *ena_tx_ctx)
 {
 	struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
@@ -340,7 +334,7 @@ static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io
 	return ena_com_sq_update_tail(io_sq);
 }
 
-static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
 					struct ena_eth_io_rx_cdesc_base *cdesc)
 {
 	ena_rx_ctx->l3_proto = cdesc->status &
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 340d02b64ca6..77986c0ea52c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -125,8 +125,55 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
 	return ena_com_free_desc(io_sq) > temp;
 }
 
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+					     struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	if (!ena_tx_ctx->meta_valid)
+		return false;
+
+	return !!memcmp(&io_sq->cached_tx_meta,
+			&ena_tx_ctx->ena_meta,
+			sizeof(struct ena_com_tx_meta));
+}
+
+static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq)
+{
+	return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) &&
+	       io_sq->llq_info.max_entries_in_tx_burst > 0;
+}
+
+static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
+					      struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	struct ena_com_llq_info *llq_info;
+	int descs_after_first_entry;
+	int num_entries_needed = 1;
+	u16 num_descs;
+
+	if (!is_llq_max_tx_burst_exists(io_sq))
+		return false;
+
+	llq_info = &io_sq->llq_info;
+	num_descs = ena_tx_ctx->num_bufs;
+
+	if (unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
+		++num_descs;
+
+	if (num_descs > llq_info->descs_num_before_header) {
+		descs_after_first_entry = num_descs - llq_info->descs_num_before_header;
+		num_entries_needed += DIV_ROUND_UP(descs_after_first_entry,
+						   llq_info->descs_per_entry);
+	}
+
+	pr_debug("queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid,
+		 num_descs, num_entries_needed);
+
+	return num_entries_needed > io_sq->entries_in_tx_burst_left;
+}
+
 static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 {
+	u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst;
 	u16 tail = io_sq->tail;
 
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
@@ -134,6 +181,12 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 
 	writel(tail, io_sq->db_addr);
 
+	if (is_llq_max_tx_burst_exists(io_sq)) {
+		pr_debug("reset available entries in tx burst for queue %d to %d\n",
+			 io_sq->qid, max_entries_in_tx_burst);
+		io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
+	}
+
 	return 0;
 }
 
@@ -142,15 +195,17 @@ static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
 	u16 unreported_comp, head;
 	bool need_update;
 
-	head = io_cq->head;
-	unreported_comp = head - io_cq->last_head_update;
-	need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
-
-	if (io_cq->cq_head_db_reg && need_update) {
-		pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
-			 io_cq->qid, head);
-		writel(head, io_cq->cq_head_db_reg);
-		io_cq->last_head_update = head;
+	if (unlikely(io_cq->cq_head_db_reg)) {
+		head = io_cq->head;
+		unreported_comp = head - io_cq->last_head_update;
+		need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+		if (unlikely(need_update)) {
+			pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
+				 io_cq->qid, head);
+			writel(head, io_cq->cq_head_db_reg);
+			io_cq->last_head_update = head;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index fe596bc30a96..b997c3ce9e2b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -88,13 +88,14 @@ static const struct ena_stats ena_stats_tx_strings[] = {
 static const struct ena_stats ena_stats_rx_strings[] = {
 	ENA_STAT_RX_ENTRY(cnt),
 	ENA_STAT_RX_ENTRY(bytes),
+	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+	ENA_STAT_RX_ENTRY(csum_good),
 	ENA_STAT_RX_ENTRY(refil_partial),
 	ENA_STAT_RX_ENTRY(bad_csum),
 	ENA_STAT_RX_ENTRY(page_alloc_fail),
 	ENA_STAT_RX_ENTRY(skb_alloc_fail),
 	ENA_STAT_RX_ENTRY(dma_mapping_err),
 	ENA_STAT_RX_ENTRY(bad_desc_num),
-	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
 	ENA_STAT_RX_ENTRY(bad_req_id),
 	ENA_STAT_RX_ENTRY(empty_rx_ring),
 	ENA_STAT_RX_ENTRY(csum_unchecked),
@@ -447,13 +448,32 @@ static void ena_get_ringparam(struct net_device *netdev,
 			      struct ethtool_ringparam *ring)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
-	struct ena_ring *tx_ring = &adapter->tx_ring[0];
-	struct ena_ring *rx_ring = &adapter->rx_ring[0];
 
-	ring->rx_max_pending = rx_ring->ring_size;
-	ring->tx_max_pending = tx_ring->ring_size;
-	ring->rx_pending = rx_ring->ring_size;
-	ring->tx_pending = tx_ring->ring_size;
+	ring->tx_max_pending = adapter->max_tx_ring_size;
+	ring->rx_max_pending = adapter->max_rx_ring_size;
+	ring->tx_pending = adapter->tx_ring[0].ring_size;
+	ring->rx_pending = adapter->rx_ring[0].ring_size;
+}
+
+static int ena_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	u32 new_tx_size, new_rx_size;
+
+	new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ?
+			ENA_MIN_RING_SIZE : ring->tx_pending;
+	new_tx_size = rounddown_pow_of_two(new_tx_size);
+
+	new_rx_size = ring->rx_pending < ENA_MIN_RING_SIZE ?
+			ENA_MIN_RING_SIZE : ring->rx_pending;
+	new_rx_size = rounddown_pow_of_two(new_rx_size);
+
+	if (new_tx_size == adapter->requested_tx_ring_size &&
+	    new_rx_size == adapter->requested_rx_ring_size)
+		return 0;
+
+	return ena_update_queue_sizes(adapter, new_tx_size, new_rx_size);
 }
 
 static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
@@ -807,6 +827,7 @@ static const struct ethtool_ops ena_ethtool_ops = {
 	.get_coalesce		= ena_get_coalesce,
 	.set_coalesce		= ena_set_coalesce,
 	.get_ringparam		= ena_get_ringparam,
+	.set_ringparam		= ena_set_ringparam,
 	.get_sset_count         = ena_get_sset_count,
 	.get_strings		= ena_get_strings,
 	.get_ethtool_stats      = ena_get_ethtool_stats,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 9c83642922c7..664e3ed97ea9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -182,7 +182,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 		ena_init_io_rings_common(adapter, rxr, i);
 
 		/* TX specific ring state */
-		txr->ring_size = adapter->tx_ring_size;
+		txr->ring_size = adapter->requested_tx_ring_size;
 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 		txr->sgl_size = adapter->max_tx_sgl_size;
@@ -190,7 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 
 		/* RX specific ring state */
-		rxr->ring_size = adapter->rx_ring_size;
+		rxr->ring_size = adapter->requested_rx_ring_size;
 		rxr->rx_copybreak = adapter->rx_copybreak;
 		rxr->sgl_size = adapter->max_rx_sgl_size;
 		rxr->smoothed_interval =
@@ -228,11 +228,11 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 	}
 
 	size = sizeof(u16) * tx_ring->ring_size;
-	tx_ring->free_tx_ids = vzalloc_node(size, node);
-	if (!tx_ring->free_tx_ids) {
-		tx_ring->free_tx_ids = vzalloc(size);
-		if (!tx_ring->free_tx_ids)
-			goto err_free_tx_ids;
+	tx_ring->free_ids = vzalloc_node(size, node);
+	if (!tx_ring->free_ids) {
+		tx_ring->free_ids = vzalloc(size);
+		if (!tx_ring->free_ids)
+			goto err_tx_free_ids;
 	}
 
 	size = tx_ring->tx_max_header_size;
@@ -245,7 +245,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 
 	/* Req id ring for TX out of order completions */
 	for (i = 0; i < tx_ring->ring_size; i++)
-		tx_ring->free_tx_ids[i] = i;
+		tx_ring->free_ids[i] = i;
 
 	/* Reset tx statistics */
 	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
@@ -256,9 +256,9 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 	return 0;
 
 err_push_buf_intermediate_buf:
-	vfree(tx_ring->free_tx_ids);
-	tx_ring->free_tx_ids = NULL;
-err_free_tx_ids:
+	vfree(tx_ring->free_ids);
+	tx_ring->free_ids = NULL;
+err_tx_free_ids:
 	vfree(tx_ring->tx_buffer_info);
 	tx_ring->tx_buffer_info = NULL;
 err_tx_buffer_info:
@@ -278,8 +278,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 	vfree(tx_ring->tx_buffer_info);
 	tx_ring->tx_buffer_info = NULL;
 
-	vfree(tx_ring->free_tx_ids);
-	tx_ring->free_tx_ids = NULL;
+	vfree(tx_ring->free_ids);
+	tx_ring->free_ids = NULL;
 
 	vfree(tx_ring->push_buf_intermediate_buf);
 	tx_ring->push_buf_intermediate_buf = NULL;
@@ -326,7 +326,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 		ena_free_tx_resources(adapter, i);
 }
 
-static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
+static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
 {
 	if (likely(req_id < rx_ring->ring_size))
 		return 0;
@@ -377,10 +377,10 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter,
 	}
 
 	size = sizeof(u16) * rx_ring->ring_size;
-	rx_ring->free_rx_ids = vzalloc_node(size, node);
-	if (!rx_ring->free_rx_ids) {
-		rx_ring->free_rx_ids = vzalloc(size);
-		if (!rx_ring->free_rx_ids) {
+	rx_ring->free_ids = vzalloc_node(size, node);
+	if (!rx_ring->free_ids) {
+		rx_ring->free_ids = vzalloc(size);
+		if (!rx_ring->free_ids) {
 			vfree(rx_ring->rx_buffer_info);
 			rx_ring->rx_buffer_info = NULL;
 			return -ENOMEM;
@@ -389,7 +389,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter,
 
 	/* Req id ring for receiving RX pkts out of order */
 	for (i = 0; i < rx_ring->ring_size; i++)
-		rx_ring->free_rx_ids[i] = i;
+		rx_ring->free_ids[i] = i;
 
 	/* Reset rx statistics */
 	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
@@ -415,8 +415,8 @@ static void ena_free_rx_resources(struct ena_adapter *adapter,
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
-	vfree(rx_ring->free_rx_ids);
-	rx_ring->free_rx_ids = NULL;
+	vfree(rx_ring->free_ids);
+	rx_ring->free_ids = NULL;
 }
 
 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
@@ -460,7 +460,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 		ena_free_rx_resources(adapter, i);
 }
 
-static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
+static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 				    struct ena_rx_buffer *rx_info, gfp_t gfp)
 {
 	struct ena_com_buf *ena_buf;
@@ -531,7 +531,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 	for (i = 0; i < num; i++) {
 		struct ena_rx_buffer *rx_info;
 
-		req_id = rx_ring->free_rx_ids[next_to_use];
+		req_id = rx_ring->free_ids[next_to_use];
 		rc = validate_rx_req_id(rx_ring, req_id);
 		if (unlikely(rc < 0))
 			break;
@@ -594,7 +594,6 @@ static void ena_free_rx_bufs(struct ena_adapter *adapter,
 
 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
  * @adapter: board private structure
- *
  */
 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 {
@@ -621,7 +620,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 		ena_free_rx_bufs(adapter, i);
 }
 
-static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring,
+static void ena_unmap_tx_skb(struct ena_ring *tx_ring,
 				    struct ena_tx_buffer *tx_info)
 {
 	struct ena_com_buf *ena_buf;
@@ -797,7 +796,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 		tx_pkts++;
 		total_done += tx_info->tx_descs;
 
-		tx_ring->free_tx_ids[next_to_clean] = req_id;
+		tx_ring->free_ids[next_to_clean] = req_id;
 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
 						     tx_ring->ring_size);
 	}
@@ -911,7 +910,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
 		skb_put(skb, len);
 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-		rx_ring->free_rx_ids[*next_to_clean] = req_id;
+		rx_ring->free_ids[*next_to_clean] = req_id;
 		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
 						     rx_ring->ring_size);
 		return skb;
@@ -935,7 +934,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
 		rx_info->page = NULL;
 
-		rx_ring->free_rx_ids[*next_to_clean] = req_id;
+		rx_ring->free_ids[*next_to_clean] = req_id;
 		*next_to_clean =
 			ENA_RX_RING_IDX_NEXT(*next_to_clean,
 					     rx_ring->ring_size);
@@ -956,7 +955,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
  * @ena_rx_ctx: received packet context/metadata
  * @skb: skb currently being received and modified
  */
-static inline void ena_rx_checksum(struct ena_ring *rx_ring,
+static void ena_rx_checksum(struct ena_ring *rx_ring,
 				   struct ena_com_rx_ctx *ena_rx_ctx,
 				   struct sk_buff *skb)
 {
@@ -1001,6 +1000,9 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring,
 
 		if (likely(ena_rx_ctx->l4_csum_checked)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.csum_good++;
+			u64_stats_update_end(&rx_ring->syncp);
 		} else {
 			u64_stats_update_begin(&rx_ring->syncp);
 			rx_ring->rx_stats.csum_unchecked++;
@@ -1088,7 +1090,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 		/* exit if we failed to retrieve a buffer */
 		if (unlikely(!skb)) {
 			for (i = 0; i < ena_rx_ctx.descs; i++) {
-				rx_ring->free_tx_ids[next_to_clean] =
+				rx_ring->free_ids[next_to_clean] =
 					rx_ring->ena_bufs[i].req_id;
 				next_to_clean =
 					ENA_RX_RING_IDX_NEXT(next_to_clean,
@@ -1153,7 +1155,7 @@ error:
 	return 0;
 }
 
-inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
+void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
 				       struct ena_ring *tx_ring)
 {
 	/* We apply adaptive moderation on Rx path only.
@@ -1172,7 +1174,7 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
 	rx_ring->per_napi_bytes = 0;
 }
 
-static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 					struct ena_ring *rx_ring)
 {
 	struct ena_eth_io_intr_reg intr_reg;
@@ -1192,7 +1194,7 @@ static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
 	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
 }
 
-static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 					     struct ena_ring *rx_ring)
 {
 	int cpu = get_cpu();
@@ -1635,7 +1637,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 	ctx.qid = ena_qid;
 	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
 	ctx.msix_vector = msix_vector;
-	ctx.queue_size = adapter->tx_ring_size;
+	ctx.queue_size = tx_ring->ring_size;
 	ctx.numa_node = cpu_to_node(tx_ring->cpu);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1702,7 +1704,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
 	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
 	ctx.msix_vector = msix_vector;
-	ctx.queue_size = adapter->rx_ring_size;
+	ctx.queue_size = rx_ring->ring_size;
 	ctx.numa_node = cpu_to_node(rx_ring->cpu);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1749,6 +1751,112 @@ create_err:
 	return rc;
 }
 
+static void set_io_rings_size(struct ena_adapter *adapter,
+				     int new_tx_size, int new_rx_size)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		adapter->tx_ring[i].ring_size = new_tx_size;
+		adapter->rx_ring[i].ring_size = new_rx_size;
+	}
+}
+
+/* This function allows queue allocation to backoff when the system is
+ * low on memory. If there is not enough memory to allocate io queues
+ * the driver will try to allocate smaller queues.
+ *
+ * The backoff algorithm is as follows:
+ *  1. Try to allocate TX and RX and if successful.
+ *  1.1. return success
+ *
+ *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
+ *
+ *  3. If TX or RX is smaller than 256
+ *  3.1. return failure.
+ *  4. else
+ *  4.1. go back to 1.
+ */
+static int create_queues_with_size_backoff(struct ena_adapter *adapter)
+{
+	int rc, cur_rx_ring_size, cur_tx_ring_size;
+	int new_rx_ring_size, new_tx_ring_size;
+
+	/* current queue sizes might be set to smaller than the requested
+	 * ones due to past queue allocation failures.
+	 */
+	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
+			  adapter->requested_rx_ring_size);
+
+	while (1) {
+		rc = ena_setup_all_tx_resources(adapter);
+		if (rc)
+			goto err_setup_tx;
+
+		rc = ena_create_all_io_tx_queues(adapter);
+		if (rc)
+			goto err_create_tx_queues;
+
+		rc = ena_setup_all_rx_resources(adapter);
+		if (rc)
+			goto err_setup_rx;
+
+		rc = ena_create_all_io_rx_queues(adapter);
+		if (rc)
+			goto err_create_rx_queues;
+
+		return 0;
+
+err_create_rx_queues:
+		ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+		ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+		ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+		if (rc != -ENOMEM) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Queue creation failed with error code %d\n",
+				  rc);
+			return rc;
+		}
+
+		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
+		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
+
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
+			  cur_tx_ring_size, cur_rx_ring_size);
+
+		new_tx_ring_size = cur_tx_ring_size;
+		new_rx_ring_size = cur_rx_ring_size;
+
+		/* Decrease the size of the larger queue, or
+		 * decrease both if they are the same size.
+		 */
+		if (cur_rx_ring_size <= cur_tx_ring_size)
+			new_tx_ring_size = cur_tx_ring_size / 2;
+		if (cur_rx_ring_size >= cur_tx_ring_size)
+			new_rx_ring_size = cur_rx_ring_size / 2;
+
+		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
+		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
+				  ENA_MIN_RING_SIZE);
+			return rc;
+		}
+
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
+			  new_tx_ring_size,
+			  new_rx_ring_size);
+
+		set_io_rings_size(adapter, new_tx_ring_size,
+				  new_rx_ring_size);
+	}
+}
+
 static int ena_up(struct ena_adapter *adapter)
 {
 	int rc, i;
@@ -1768,25 +1876,9 @@ static int ena_up(struct ena_adapter *adapter)
 	if (rc)
 		goto err_req_irq;
 
-	/* allocate transmit descriptors */
-	rc = ena_setup_all_tx_resources(adapter);
+	rc = create_queues_with_size_backoff(adapter);
 	if (rc)
-		goto err_setup_tx;
-
-	/* allocate receive descriptors */
-	rc = ena_setup_all_rx_resources(adapter);
-	if (rc)
-		goto err_setup_rx;
-
-	/* Create TX queues */
-	rc = ena_create_all_io_tx_queues(adapter);
-	if (rc)
-		goto err_create_tx_queues;
-
-	/* Create RX queues */
-	rc = ena_create_all_io_rx_queues(adapter);
-	if (rc)
-		goto err_create_rx_queues;
+		goto err_create_queues_with_backoff;
 
 	rc = ena_up_complete(adapter);
 	if (rc)
@@ -1815,14 +1907,11 @@ static int ena_up(struct ena_adapter *adapter)
 	return rc;
 
 err_up:
-	ena_destroy_all_rx_queues(adapter);
-err_create_rx_queues:
 	ena_destroy_all_tx_queues(adapter);
-err_create_tx_queues:
-	ena_free_all_io_rx_resources(adapter);
-err_setup_rx:
 	ena_free_all_io_tx_resources(adapter);
-err_setup_tx:
+	ena_destroy_all_rx_queues(adapter);
+	ena_free_all_io_rx_resources(adapter);
+err_create_queues_with_backoff:
 	ena_free_io_irq(adapter);
 err_req_irq:
 	ena_del_napi(adapter);
@@ -1942,6 +2031,20 @@ static int ena_close(struct net_device *netdev)
 	return 0;
 }
 
+int ena_update_queue_sizes(struct ena_adapter *adapter,
+			   u32 new_tx_size,
+			   u32 new_rx_size)
+{
+	bool dev_up;
+
+	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	ena_close(adapter->netdev);
+	adapter->requested_tx_ring_size = new_tx_size;
+	adapter->requested_rx_ring_size = new_rx_size;
+	ena_init_io_rings(adapter);
+	return dev_up ? ena_up(adapter) : 0;
+}
+
 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
 {
 	u32 mss = skb_shinfo(skb)->gso_size;
@@ -2152,7 +2255,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_tx_timestamp(skb);
 
 	next_to_use = tx_ring->next_to_use;
-	req_id = tx_ring->free_tx_ids[next_to_use];
+	req_id = tx_ring->free_ids[next_to_use];
 	tx_info = &tx_ring->tx_buffer_info[req_id];
 	tx_info->num_of_bufs = 0;
 
@@ -2172,6 +2275,13 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* set flags and meta data */
 	ena_tx_csum(&ena_tx_ctx, skb);
 
+	if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) {
+		netif_dbg(adapter, tx_queued, dev,
+			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+			  qid);
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+	}
+
 	/* prepare the packet's descriptors to dma engine */
 	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
 				&nb_hw_desc);
@@ -2447,13 +2557,6 @@ static int ena_device_validate_params(struct ena_adapter *adapter,
 		return -EINVAL;
 	}
 
-	if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
-	    (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
-		netif_err(adapter, drv, netdev,
-			  "Error, device doesn't support enough queues\n");
-		return -EINVAL;
-	}
-
 	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
 		netif_err(adapter, drv, netdev,
 			  "Error, device max mtu is smaller than netdev MTU\n");
@@ -3027,18 +3130,32 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
 				 struct ena_com_dev *ena_dev,
 				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
-	int io_sq_num, io_queue_num;
+	int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
 
-	/* In case of LLQ use the llq number in the get feature cmd */
+	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+			&get_feat_ctx->max_queue_ext.max_queue_ext;
+		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
+				  max_queue_ext->max_rx_cq_num);
+
+		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
+		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
+	} else {
+		struct ena_admin_queue_feature_desc *max_queues =
+			&get_feat_ctx->max_queues;
+		io_tx_sq_num = max_queues->max_sq_num;
+		io_tx_cq_num = max_queues->max_cq_num;
+		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
+	}
+
+	/* In case of LLQ use the llq fields for the tx SQ/CQ */
 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
-		io_sq_num = get_feat_ctx->llq.max_llq_num;
-	else
-		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
 
 	io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
-	io_queue_num = min_t(int, io_queue_num, io_sq_num);
-	io_queue_num = min_t(int, io_queue_num,
-			     get_feat_ctx->max_queues.max_cq_num);
+	io_queue_num = min_t(int, io_queue_num, io_rx_num);
+	io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
+	io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
 	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
 	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
 	if (unlikely(!io_queue_num)) {
@@ -3212,7 +3329,7 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 	pci_release_selected_regions(pdev, release_bars);
 }
 
-static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
+static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
 {
 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
 	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
@@ -3221,36 +3338,70 @@ static inline void set_default_llq_configurations(struct ena_llq_configurations
 	llq_config->llq_ring_entry_size_value = 128;
 }
 
-static int ena_calc_queue_size(struct pci_dev *pdev,
-			       struct ena_com_dev *ena_dev,
-			       u16 *max_tx_sgl_size,
-			       u16 *max_rx_sgl_size,
-			       struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
 {
-	u32 queue_size = ENA_DEFAULT_RING_SIZE;
+	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
+	struct ena_com_dev *ena_dev = ctx->ena_dev;
+	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
+	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
+	u32 max_tx_queue_size;
+	u32 max_rx_queue_size;
 
-	queue_size = min_t(u32, queue_size,
-			   get_feat_ctx->max_queues.max_cq_depth);
-	queue_size = min_t(u32, queue_size,
-			   get_feat_ctx->max_queues.max_sq_depth);
+	if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
+		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
+					  max_queue_ext->max_rx_sq_depth);
+		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
 
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
-		queue_size = min_t(u32, queue_size,
-				   get_feat_ctx->llq.max_llq_depth);
+		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  llq->max_llq_depth);
+		else
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  max_queue_ext->max_tx_sq_depth);
 
-	queue_size = rounddown_pow_of_two(queue_size);
+		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queue_ext->max_per_packet_tx_descs);
+		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queue_ext->max_per_packet_rx_descs);
+	} else {
+		struct ena_admin_queue_feature_desc *max_queues =
+			&ctx->get_feat_ctx->max_queues;
+		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
+					  max_queues->max_sq_depth);
+		max_tx_queue_size = max_queues->max_cq_depth;
+
+		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  llq->max_llq_depth);
+		else
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  max_queues->max_sq_depth);
 
-	if (unlikely(!queue_size)) {
-		dev_err(&pdev->dev, "Invalid queue size\n");
-		return -EFAULT;
+		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queues->max_packet_tx_descs);
+		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queues->max_packet_rx_descs);
 	}
 
-	*max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
-				 get_feat_ctx->max_queues.max_packet_tx_descs);
-	*max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
-				 get_feat_ctx->max_queues.max_packet_rx_descs);
+	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
+	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
+
+	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
+				  max_tx_queue_size);
+	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
+				  max_rx_queue_size);
 
-	return queue_size;
+	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
+	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
+
+	ctx->max_tx_queue_size = max_tx_queue_size;
+	ctx->max_rx_queue_size = max_rx_queue_size;
+	ctx->tx_queue_size = tx_queue_size;
+	ctx->rx_queue_size = rx_queue_size;
+
+	return 0;
 }
 
 /* ena_probe - Device Initialization Routine
@@ -3266,23 +3417,19 @@ static int ena_calc_queue_size(struct pci_dev *pdev,
 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct ena_com_dev_get_features_ctx get_feat_ctx;
-	static int version_printed;
-	struct net_device *netdev;
-	struct ena_adapter *adapter;
+	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
 	struct ena_llq_configurations llq_config;
 	struct ena_com_dev *ena_dev = NULL;
-	char *queue_type_str;
-	static int adapters_found;
+	struct ena_adapter *adapter;
 	int io_queue_num, bars, rc;
-	int queue_size;
-	u16 tx_sgl_size = 0;
-	u16 rx_sgl_size = 0;
+	struct net_device *netdev;
+	static int adapters_found;
+	char *queue_type_str;
 	bool wd_state;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
-	if (version_printed++ == 0)
-		dev_info(&pdev->dev, "%s", version);
+	dev_info_once(&pdev->dev, "%s", version);
 
 	rc = pci_enable_device_mem(pdev);
 	if (rc) {
@@ -3334,20 +3481,25 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_device_destroy;
 	}
 
+	calc_queue_ctx.ena_dev = ena_dev;
+	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
+	calc_queue_ctx.pdev = pdev;
+
 	/* initial Tx interrupt delay, Assumes 1 usec granularity.
 	* Updated during device initialization with the real granularity
 	*/
 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
 	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
-	queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
-					 &rx_sgl_size, &get_feat_ctx);
-	if ((queue_size <= 0) || (io_queue_num <= 0)) {
+	rc = ena_calc_queue_size(&calc_queue_ctx);
+	if (rc || io_queue_num <= 0) {
 		rc = -EFAULT;
 		goto err_device_destroy;
 	}
 
-	dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n",
-		 io_queue_num, queue_size,
+	dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n",
+		 io_queue_num,
+		 calc_queue_ctx.rx_queue_size,
+		 calc_queue_ctx.tx_queue_size,
 		 (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
 		 "ENABLED" : "DISABLED");
 
@@ -3373,11 +3525,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
 
-	adapter->tx_ring_size = queue_size;
-	adapter->rx_ring_size = queue_size;
-
-	adapter->max_tx_sgl_size = tx_sgl_size;
-	adapter->max_rx_sgl_size = rx_sgl_size;
+	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
+	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
+	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
+	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
+	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
+	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
 
 	adapter->num_queues = io_queue_num;
 	adapter->last_monitored_tx_qid = 0;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 63870072cbbd..efbcffd22215 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -44,8 +44,8 @@
 #include "ena_eth_com.h"
 
 #define DRV_MODULE_VER_MAJOR	2
-#define DRV_MODULE_VER_MINOR	0
-#define DRV_MODULE_VER_SUBMINOR 3
+#define DRV_MODULE_VER_MINOR	1
+#define DRV_MODULE_VER_SUBMINOR 0
 
 #define DRV_MODULE_NAME		"ena"
 #ifndef DRV_MODULE_VERSION
@@ -79,6 +79,7 @@
 #define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
 
 #define ENA_DEFAULT_RING_SIZE	(1024)
+#define ENA_MIN_RING_SIZE	(256)
 
 #define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
 #define ENA_DEFAULT_RX_COPYBREAK	(256 - NET_IP_ALIGN)
@@ -154,6 +155,18 @@ struct ena_napi {
 	u32 qid;
 };
 
+struct ena_calc_queue_size_ctx {
+	struct ena_com_dev_get_features_ctx *get_feat_ctx;
+	struct ena_com_dev *ena_dev;
+	struct pci_dev *pdev;
+	u16 tx_queue_size;
+	u16 rx_queue_size;
+	u16 max_tx_queue_size;
+	u16 max_rx_queue_size;
+	u16 max_tx_sgl_size;
+	u16 max_rx_sgl_size;
+};
+
 struct ena_tx_buffer {
 	struct sk_buff *skb;
 	/* num of ena desc for this specific skb
@@ -208,26 +221,24 @@ struct ena_stats_tx {
 struct ena_stats_rx {
 	u64 cnt;
 	u64 bytes;
+	u64 rx_copybreak_pkt;
+	u64 csum_good;
 	u64 refil_partial;
 	u64 bad_csum;
 	u64 page_alloc_fail;
 	u64 skb_alloc_fail;
 	u64 dma_mapping_err;
 	u64 bad_desc_num;
-	u64 rx_copybreak_pkt;
 	u64 bad_req_id;
 	u64 empty_rx_ring;
 	u64 csum_unchecked;
 };
 
 struct ena_ring {
-	union {
-		/* Holds the empty requests for TX/RX
-		 * out of order completions
-		 */
-		u16 *free_tx_ids;
-		u16 *free_rx_ids;
-	};
+	/* Holds the empty requests for TX/RX
+	 * out of order completions
+	 */
+	u16 *free_ids;
 
 	union {
 		struct ena_tx_buffer *tx_buffer_info;
@@ -321,8 +332,11 @@ struct ena_adapter {
 	u32 tx_usecs, rx_usecs; /* interrupt moderation */
 	u32 tx_frames, rx_frames; /* interrupt moderation */
 
-	u32 tx_ring_size;
-	u32 rx_ring_size;
+	u32 requested_tx_ring_size;
+	u32 requested_rx_ring_size;
+
+	u32 max_tx_ring_size;
+	u32 max_rx_ring_size;
 
 	u32 msg_enable;
 
@@ -372,6 +386,10 @@ void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
 
 void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 
+int ena_update_queue_sizes(struct ena_adapter *adapter,
+			   u32 new_tx_size,
+			   u32 new_rx_size);
+
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
 #endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 173be45463ee..02f1b70c4e25 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -9,6 +9,8 @@
 #ifndef AQ_CFG_H
 #define AQ_CFG_H
 
+#include <generated/utsrelease.h>
+
 #define AQ_CFG_VECS_DEF   8U
 #define AQ_CFG_TCS_DEF    1U
 
@@ -86,10 +88,7 @@
 #define AQ_CFG_DRV_AUTHOR      "aQuantia"
 #define AQ_CFG_DRV_DESC        "aQuantia Corporation(R) Network Driver"
 #define AQ_CFG_DRV_NAME        "atlantic"
-#define AQ_CFG_DRV_VERSION	__stringify(NIC_MAJOR_DRIVER_VERSION)"."\
-				__stringify(NIC_MINOR_DRIVER_VERSION)"."\
-				__stringify(NIC_BUILD_DRIVER_VERSION)"."\
-				__stringify(NIC_REVISION_DRIVER_VERSION) \
+#define AQ_CFG_DRV_VERSION	UTS_RELEASE \
 				AQ_CFG_DRV_VERSION_SUFFIX
 
 #endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
index adad6a7acabe..6da65099047d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2014-2019 aQuantia Corporation. */
 
 /* File aq_drvinfo.c: Definition of common code for firmware info in sys.*/
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
index 41fbb1358068..23a0487893a7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (C) 2014-2017 aQuantia Corporation. */
 
 /* File aq_drvinfo.h: Declaration of common code for firmware info in sys.*/
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 1fff462a4175..440690b18734 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2014-2017 aQuantia Corporation. */
 
 /* File aq_filters.c: RX filters related functions. */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
index c6a08c6585d5..122e06c88a33 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (C) 2014-2017 aQuantia Corporation. */
 
 /* File aq_filters.h: RX filters related functions. */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 5315df5ff6f8..100722ad5c2d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -108,11 +108,16 @@ err_exit:
 static int aq_ndev_set_features(struct net_device *ndev,
 				netdev_features_t features)
 {
+	bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+	bool is_vlan_tx_insert = !!(features & NETIF_F_HW_VLAN_CTAG_TX);
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic);
+	bool need_ndev_restart = false;
+	struct aq_nic_cfg_s *aq_cfg;
 	bool is_lro = false;
 	int err = 0;
 
+	aq_cfg = aq_nic_get_cfg(aq_nic);
+
 	if (!(features & NETIF_F_NTUPLE)) {
 		if (aq_nic->ndev->features & NETIF_F_NTUPLE) {
 			err = aq_clear_rxnfc_all_rules(aq_nic);
@@ -135,17 +140,32 @@ static int aq_ndev_set_features(struct net_device *ndev,
 
 		if (aq_cfg->is_lro != is_lro) {
 			aq_cfg->is_lro = is_lro;
-
-			if (netif_running(ndev)) {
-				aq_ndev_close(ndev);
-				aq_ndev_open(ndev);
-			}
+			need_ndev_restart = true;
 		}
 	}
-	if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM)
+
+	if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) {
 		err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw,
 							aq_cfg);
 
+		if (unlikely(err))
+			goto err_exit;
+	}
+
+	if (aq_cfg->is_vlan_rx_strip != is_vlan_rx_strip) {
+		aq_cfg->is_vlan_rx_strip = is_vlan_rx_strip;
+		need_ndev_restart = true;
+	}
+	if (aq_cfg->is_vlan_tx_insert != is_vlan_tx_insert) {
+		aq_cfg->is_vlan_tx_insert = is_vlan_tx_insert;
+		need_ndev_restart = true;
+	}
+
+	if (need_ndev_restart && netif_running(ndev)) {
+		aq_ndev_close(ndev);
+		aq_ndev_open(ndev);
+	}
+
 err_exit:
 	return err;
 }
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 41172fbebddd..e1392766e21e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -126,6 +126,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 
 	cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
 	cfg->features = cfg->aq_hw_caps->hw_features;
+	cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX);
+	cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX);
 	cfg->is_vlan_force_promisc = true;
 }
 
@@ -286,7 +288,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
 	self->ndev->hw_features |= aq_hw_caps->hw_features;
 	self->ndev->features = aq_hw_caps->hw_features;
 	self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
-				     NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO;
+				     NETIF_F_RXHASH | NETIF_F_SG |
+				     NETIF_F_LRO | NETIF_F_TSO;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
 	self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
@@ -427,26 +430,37 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 	unsigned int dx = ring->sw_tail;
 	struct aq_ring_buff_s *first = NULL;
 	struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx];
+	bool need_context_tag = false;
+
+	dx_buff->flags = 0U;
 
 	if (unlikely(skb_is_gso(skb))) {
-		dx_buff->flags = 0U;
+		dx_buff->mss = skb_shinfo(skb)->gso_size;
+		dx_buff->is_gso = 1U;
 		dx_buff->len_pkt = skb->len;
 		dx_buff->len_l2 = ETH_HLEN;
 		dx_buff->len_l3 = ip_hdrlen(skb);
 		dx_buff->len_l4 = tcp_hdrlen(skb);
-		dx_buff->mss = skb_shinfo(skb)->gso_size;
-		dx_buff->is_txc = 1U;
 		dx_buff->eop_index = 0xffffU;
-
 		dx_buff->is_ipv6 =
 			(ip_hdr(skb)->version == 6) ? 1U : 0U;
+		need_context_tag = true;
+	}
+
+	if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) {
+		dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb);
+		dx_buff->len_pkt = skb->len;
+		dx_buff->is_vlan = 1U;
+		need_context_tag = true;
+	}
 
+	if (need_context_tag) {
 		dx = aq_ring_next_dx(ring, dx);
 		dx_buff = &ring->buff_ring[dx];
+		dx_buff->flags = 0U;
 		++ret;
 	}
 
-	dx_buff->flags = 0U;
 	dx_buff->len = skb_headlen(skb);
 	dx_buff->pa = dma_map_single(aq_nic_get_dev(self),
 				     skb->data,
@@ -535,7 +549,7 @@ mapping_error:
 	     --ret, dx = aq_ring_next_dx(ring, dx)) {
 		dx_buff = &ring->buff_ring[dx];
 
-		if (!dx_buff->is_txc && dx_buff->pa) {
+		if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) {
 			if (unlikely(dx_buff->is_sop)) {
 				dma_unmap_single(aq_nic_get_dev(self),
 						 dx_buff->pa,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 0f22f5d5691b..255b54a6ae07 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -35,6 +35,8 @@ struct aq_nic_cfg_s {
 	u32 flow_control;
 	u32 link_speed_msk;
 	u32 wol;
+	u8 is_vlan_rx_strip;
+	u8 is_vlan_tx_insert;
 	bool is_vlan_force_promisc;
 	u16 is_mc_list_enabled;
 	u16 mc_list_count;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 2a7b91ed17c5..3901d7994ca1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -409,6 +409,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 			}
 		}
 
+		if (buff->is_vlan)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       buff->vlan_rx_tag);
+
 		skb->protocol = eth_type_trans(skb, ndev);
 
 		aq_rx_checksum(self, buff, skb);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 6bd67210d0b7..47abd09d06c2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -27,7 +27,7 @@ struct aq_rxpage {
  *         +----------+----------+----------+-----------
  * 4/8bytes|len pkt   |len pkt   |          | skb
  *         +----------+----------+----------+-----------
- * 4/8bytes|is_txc    |len,flags |len       |len,is_eop
+ * 4/8bytes|is_gso    |len,flags |len       |len,is_eop
  *         +----------+----------+----------+-----------
  *
  *  This aq_ring_buff_s doesn't have endianness dependency.
@@ -44,6 +44,7 @@ struct __packed aq_ring_buff_s {
 			u8 is_hash_l4;
 			u8 rsvd1;
 			struct aq_rxpage rxdata;
+			u16 vlan_rx_tag;
 		};
 		/* EOP */
 		struct {
@@ -59,6 +60,7 @@ struct __packed aq_ring_buff_s {
 			u8 is_ipv6:1;
 			u8 rsvd2:7;
 			u32 len_pkt;
+			u16 vlan_tx_tag;
 		};
 	};
 	union {
@@ -70,11 +72,12 @@ struct __packed aq_ring_buff_s {
 			u32 is_cso_err:1;
 			u32 is_sop:1;
 			u32 is_eop:1;
-			u32 is_txc:1;
+			u32 is_gso:1;
 			u32 is_mapped:1;
 			u32 is_cleaned:1;
 			u32 is_error:1;
-			u32 rsvd3:6;
+			u32 is_vlan:1;
+			u32 rsvd3:5;
 			u16 eop_index;
 			u16 rsvd4;
 		};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 0f140a9fe404..359a4d387185 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -451,7 +451,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_txc) {
+		if (buff->is_gso) {
 			txd->ctl |= (buff->len_l3 << 31) |
 				(buff->len_l2 << 24) |
 				HW_ATL_A0_TXD_CTL_CMD_TCP |
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 13ac2661a473..30f7fc4c97ff 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -40,7 +40,9 @@
 			NETIF_F_TSO |     \
 			NETIF_F_LRO |     \
 			NETIF_F_NTUPLE |  \
-			NETIF_F_HW_VLAN_CTAG_FILTER, \
+			NETIF_F_HW_VLAN_CTAG_FILTER | \
+			NETIF_F_HW_VLAN_CTAG_RX |     \
+			NETIF_F_HW_VLAN_CTAG_TX,      \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
@@ -245,6 +247,9 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	/* LSO offloads*/
 	hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
 
+	/* Outer VLAN tag offload */
+	hw_atl_rpo_outer_vlan_tag_mode_set(self, 1U);
+
 /* LRO offloads */
 	{
 		unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U :
@@ -487,6 +492,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	unsigned int buff_pa_len = 0U;
 	unsigned int pkt_len = 0U;
 	unsigned int frag_count = 0U;
+	bool is_vlan = false;
 	bool is_gso = false;
 
 	buff = &ring->buff_ring[ring->sw_tail];
@@ -501,36 +507,44 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_txc) {
+		if (buff->is_gso) {
+			txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
+			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
 			txd->ctl |= (buff->len_l3 << 31) |
-				(buff->len_l2 << 24) |
-				HW_ATL_B0_TXD_CTL_CMD_TCP |
-				HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
-			txd->ctl2 |= (buff->mss << 16) |
-				(buff->len_l4 << 8) |
-				(buff->len_l3 >> 1);
+				    (buff->len_l2 << 24);
+			txd->ctl2 |= (buff->mss << 16);
+			is_gso = true;
 
 			pkt_len -= (buff->len_l4 +
 				    buff->len_l3 +
 				    buff->len_l2);
-			is_gso = true;
-
 			if (buff->is_ipv6)
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
-		} else {
+			txd->ctl2 |= (buff->len_l4 << 8) |
+				     (buff->len_l3 >> 1);
+		}
+		if (buff->is_vlan) {
+			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
+			txd->ctl |= buff->vlan_tx_tag << 4;
+			is_vlan = true;
+		}
+		if (!buff->is_gso && !buff->is_vlan) {
 			buff_pa_len = buff->len;
 
 			txd->buf_addr = buff->pa;
 			txd->ctl |= (HW_ATL_B0_TXD_CTL_BLEN &
 						((u32)buff_pa_len << 4));
 			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXD;
+
 			/* PAY_LEN */
 			txd->ctl2 |= HW_ATL_B0_TXD_CTL2_LEN & (pkt_len << 14);
 
-			if (is_gso) {
-				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
+			if (is_gso || is_vlan) {
+				/* enable tx context */
 				txd->ctl2 |= HW_ATL_B0_TXD_CTL2_CTX_EN;
 			}
+			if (is_gso)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
 
 			/* Tx checksum offloads */
 			if (buff->is_ip_cso)
@@ -539,13 +553,16 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			if (buff->is_udp_cso || buff->is_tcp_cso)
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TUCSO;
 
+			if (is_vlan)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_VLAN;
+
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
 				is_gso = false;
+				is_vlan = false;
 			}
 		}
-
 		ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail);
 	}
 
@@ -559,6 +576,7 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
 {
 	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+	u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip;
 
 	hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx);
 
@@ -578,7 +596,8 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
 
 	hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, aq_ring->idx);
 	hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, aq_ring->idx);
-	hw_atl_rpo_rx_desc_vlan_stripping_set(self, 0U, aq_ring->idx);
+	hw_atl_rpo_rx_desc_vlan_stripping_set(self, !!vlan_rx_stripping,
+					      aq_ring->idx);
 
 	/* Rx ring set mode */
 
@@ -681,11 +700,15 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->hw_head];
 
+		buff->flags = 0U;
+		buff->is_hash_l4 = 0U;
+
 		rx_stat = (0x0000003CU & rxd_wb->status) >> 2;
 
 		is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U;
 
-		pkt_type = 0xFFU & (rxd_wb->type >> 4);
+		pkt_type = (rxd_wb->type & HW_ATL_B0_RXD_WB_STAT_PKTTYPE) >>
+			   HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT;
 
 		if (is_rx_check_sum_enabled & BIT(0) &&
 		    (0x0U == (pkt_type & 0x3U)))
@@ -706,6 +729,13 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 			buff->is_cso_err = 0U;
 		}
 
+		if (self->aq_nic_cfg->is_vlan_rx_strip &&
+		    ((pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN) ||
+		     (pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE))) {
+			buff->is_vlan = 1;
+			buff->vlan_rx_tag = le16_to_cpu(rxd_wb->vlan);
+		}
+
 		if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) {
 			/* MAC error or DMA error */
 			buff->is_error = 1U;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index e4ba2ccf9830..808d8cd4252a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -107,10 +107,17 @@
 #define HW_ATL_B0_RXD_NCEA0 (0x1)
 
 #define HW_ATL_B0_RXD_WB_STAT_RSSTYPE (0x0000000F)
+#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE_SHIFT (0x0)
 #define HW_ATL_B0_RXD_WB_STAT_PKTTYPE (0x00000FF0)
+#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT (0x4)
 #define HW_ATL_B0_RXD_WB_STAT_RXCTRL  (0x00180000)
+#define HW_ATL_B0_RXD_WB_STAT_RXCTRL_SHIFT (0x13)
 #define HW_ATL_B0_RXD_WB_STAT_SPLHDR  (0x00200000)
 #define HW_ATL_B0_RXD_WB_STAT_HDRLEN  (0xFFC00000)
+#define HW_ATL_B0_RXD_WB_STAT_HDRLEN_SHIFT (0x16)
+
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN          BIT(5)
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE   BIT(6)
 
 #define HW_ATL_B0_RXD_WB_STAT2_DD      (0x0001)
 #define HW_ATL_B0_RXD_WB_STAT2_EOP     (0x0002)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 451529069f28..1149812ae463 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1004,6 +1004,22 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
 			    rx_desc_vlan_stripping);
 }
 
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+					u32 outervlantagmode)
+{
+	aq_hw_write_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+			    HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+			    HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT,
+			    outervlantagmode);
+}
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context)
+{
+	return aq_hw_read_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+				  HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+				  HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT);
+}
+
 void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
 					   u32 tcp_udp_crc_offload_en)
 {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 34b42ce43512..0c37abbabca5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -488,6 +488,11 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
 					   u32 rx_desc_vlan_stripping,
 					   u32 descriptor);
 
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+					u32 outervlantagmode);
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context);
+
 /* set tcp/udp checksum offload enable */
 void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
 					   u32 tcp_udp_crc_offload_en);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index fc1446f737bb..c3febcdfa92e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -1383,6 +1383,24 @@
 /* default value of bitfield l4_chk_en */
 #define HW_ATL_RPOL4CHK_EN_DEFAULT 0x0
 
+/* RX outer_vl_ins_mode Bitfield Definitions
+ * Preprocessor definitions for the bitfield "outer_vl_ins_mode".
+ * PORT="pif_rpo_outer_vl_mode_i"
+ */
+
+/* Register address for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_ADR 0x00005580
+/* Bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSK 0x00000004
+/* Inverted bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSKN 0xFFFFFFFB
+/* Lower bit position of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT 2
+/* Width of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_WIDTH 1
+/* Default value of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_DEFAULT 0x0
+
 /* rx reg_res_dsbl bitfield definitions
  * preprocessor definitions for the bitfield "reg_res_dsbl".
  * port="pif_rx_reg_res_dsbl_i"
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 23374bffa92b..597654b51e01 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -7,11 +7,6 @@
 #ifndef VER_H
 #define VER_H
 
-#define NIC_MAJOR_DRIVER_VERSION           2
-#define NIC_MINOR_DRIVER_VERSION           0
-#define NIC_BUILD_DRIVER_VERSION           4
-#define NIC_REVISION_DRIVER_VERSION        0
-
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
 #endif /* VER_H */
diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index 953ff1f9ac70..0058051ba925 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -6,7 +6,7 @@
 config NET_VENDOR_ATHEROS
 	bool "Atheros devices"
 	default y
-	depends on PCI
+	depends on (PCI || ATH79)
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -17,6 +17,14 @@ config NET_VENDOR_ATHEROS
 
 if NET_VENDOR_ATHEROS
 
+config AG71XX
+	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
+	depends on ATH79
+	select PHYLIB
+	help
+	  If you wish to compile a kernel for AR7XXX/91XXX and enable
+	  ethernet support, then you should always answer Y to this.
+
 config ATL2
 	tristate "Atheros L2 Fast Ethernet support"
 	depends on PCI
diff --git a/drivers/net/ethernet/atheros/Makefile b/drivers/net/ethernet/atheros/Makefile
index aa3d394b87e6..aca696cb6425 100644
--- a/drivers/net/ethernet/atheros/Makefile
+++ b/drivers/net/ethernet/atheros/Makefile
@@ -3,6 +3,7 @@
 # Makefile for the Atheros network device drivers.
 #
 
+obj-$(CONFIG_AG71XX) += ag71xx.o
 obj-$(CONFIG_ATL1) += atlx/
 obj-$(CONFIG_ATL2) += atlx/
 obj-$(CONFIG_ATL1E) += atl1e/
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
new file mode 100644
index 000000000000..72a57c6cd254
--- /dev/null
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -0,0 +1,1898 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Atheros AR71xx built-in ethernet mac driver
+ *
+ *  Copyright (C) 2019 Oleksij Rempel <o.rempel@pengutronix.de>
+ *
+ *  List of authors contributed to this driver before mainlining:
+ *  Alexander Couzens <lynxis@fe80.eu>
+ *  Christian Lamparter <chunkeey@gmail.com>
+ *  Chuanhong Guo <gch981213@gmail.com>
+ *  Daniel F. Dickinson <cshored@thecshore.com>
+ *  David Bauer <mail@david-bauer.net>
+ *  Felix Fietkau <nbd@nbd.name>
+ *  Gabor Juhos <juhosg@freemail.hu>
+ *  Hauke Mehrtens <hauke@hauke-m.de>
+ *  Johann Neuhauser <johann@it-neuhauser.de>
+ *  John Crispin <john@phrozen.org>
+ *  Jo-Philipp Wich <jo@mein.io>
+ *  Koen Vandeputte <koen.vandeputte@ncentric.com>
+ *  Lucian Cristian <lucian.cristian@gmail.com>
+ *  Matt Merhar <mattmerhar@protonmail.com>
+ *  Milan Krstic <milan.krstic@gmail.com>
+ *  Petr Štetiar <ynezz@true.cz>
+ *  Rosen Penev <rosenp@gmail.com>
+ *  Stephen Walker <stephendwalker+github@gmail.com>
+ *  Vittorio Gambaletta <openwrt@vittgam.net>
+ *  Weijie Gao <hackpascal@gmail.com>
+ *  Imre Kaloz <kaloz@openwrt.org>
+ */
+
+#include <linux/if_vlan.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+
+/* For our NAPI weight bigger does *NOT* mean better - it means more
+ * D-cache misses and lots more wasted cycles than we'll ever
+ * possibly gain from saving instructions.
+ */
+#define AG71XX_NAPI_WEIGHT	32
+#define AG71XX_OOM_REFILL	(1 + HZ / 10)
+
+#define AG71XX_INT_ERR	(AG71XX_INT_RX_BE | AG71XX_INT_TX_BE)
+#define AG71XX_INT_TX	(AG71XX_INT_TX_PS)
+#define AG71XX_INT_RX	(AG71XX_INT_RX_PR | AG71XX_INT_RX_OF)
+
+#define AG71XX_INT_POLL	(AG71XX_INT_RX | AG71XX_INT_TX)
+#define AG71XX_INT_INIT	(AG71XX_INT_ERR | AG71XX_INT_POLL)
+
+#define AG71XX_TX_MTU_LEN	1540
+
+#define AG71XX_TX_RING_SPLIT		512
+#define AG71XX_TX_RING_DS_PER_PKT	DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+						     AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT	128
+#define AG71XX_RX_RING_SIZE_DEFAULT	256
+
+#define AG71XX_MDIO_RETRY	1000
+#define AG71XX_MDIO_DELAY	5
+#define AG71XX_MDIO_MAX_CLK	5000000
+
+/* Register offsets */
+#define AG71XX_REG_MAC_CFG1	0x0000
+#define MAC_CFG1_TXE		BIT(0)	/* Tx Enable */
+#define MAC_CFG1_STX		BIT(1)	/* Synchronize Tx Enable */
+#define MAC_CFG1_RXE		BIT(2)	/* Rx Enable */
+#define MAC_CFG1_SRX		BIT(3)	/* Synchronize Rx Enable */
+#define MAC_CFG1_TFC		BIT(4)	/* Tx Flow Control Enable */
+#define MAC_CFG1_RFC		BIT(5)	/* Rx Flow Control Enable */
+#define MAC_CFG1_SR		BIT(31)	/* Soft Reset */
+#define MAC_CFG1_INIT	(MAC_CFG1_RXE | MAC_CFG1_TXE | \
+			 MAC_CFG1_SRX | MAC_CFG1_STX)
+
+#define AG71XX_REG_MAC_CFG2	0x0004
+#define MAC_CFG2_FDX		BIT(0)
+#define MAC_CFG2_PAD_CRC_EN	BIT(2)
+#define MAC_CFG2_LEN_CHECK	BIT(4)
+#define MAC_CFG2_IF_1000	BIT(9)
+#define MAC_CFG2_IF_10_100	BIT(8)
+
+#define AG71XX_REG_MAC_MFL	0x0010
+
+#define AG71XX_REG_MII_CFG	0x0020
+#define MII_CFG_CLK_DIV_4	0
+#define MII_CFG_CLK_DIV_6	2
+#define MII_CFG_CLK_DIV_8	3
+#define MII_CFG_CLK_DIV_10	4
+#define MII_CFG_CLK_DIV_14	5
+#define MII_CFG_CLK_DIV_20	6
+#define MII_CFG_CLK_DIV_28	7
+#define MII_CFG_CLK_DIV_34	8
+#define MII_CFG_CLK_DIV_42	9
+#define MII_CFG_CLK_DIV_50	10
+#define MII_CFG_CLK_DIV_58	11
+#define MII_CFG_CLK_DIV_66	12
+#define MII_CFG_CLK_DIV_74	13
+#define MII_CFG_CLK_DIV_82	14
+#define MII_CFG_CLK_DIV_98	15
+#define MII_CFG_RESET		BIT(31)
+
+#define AG71XX_REG_MII_CMD	0x0024
+#define MII_CMD_READ		BIT(0)
+
+#define AG71XX_REG_MII_ADDR	0x0028
+#define MII_ADDR_SHIFT		8
+
+#define AG71XX_REG_MII_CTRL	0x002c
+#define AG71XX_REG_MII_STATUS	0x0030
+#define AG71XX_REG_MII_IND	0x0034
+#define MII_IND_BUSY		BIT(0)
+#define MII_IND_INVALID		BIT(2)
+
+#define AG71XX_REG_MAC_IFCTL	0x0038
+#define MAC_IFCTL_SPEED		BIT(16)
+
+#define AG71XX_REG_MAC_ADDR1	0x0040
+#define AG71XX_REG_MAC_ADDR2	0x0044
+#define AG71XX_REG_FIFO_CFG0	0x0048
+#define FIFO_CFG0_WTM		BIT(0)	/* Watermark Module */
+#define FIFO_CFG0_RXS		BIT(1)	/* Rx System Module */
+#define FIFO_CFG0_RXF		BIT(2)	/* Rx Fabric Module */
+#define FIFO_CFG0_TXS		BIT(3)	/* Tx System Module */
+#define FIFO_CFG0_TXF		BIT(4)	/* Tx Fabric Module */
+#define FIFO_CFG0_ALL	(FIFO_CFG0_WTM | FIFO_CFG0_RXS | FIFO_CFG0_RXF \
+			| FIFO_CFG0_TXS | FIFO_CFG0_TXF)
+#define FIFO_CFG0_INIT	(FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT)
+
+#define FIFO_CFG0_ENABLE_SHIFT	8
+
+#define AG71XX_REG_FIFO_CFG1	0x004c
+#define AG71XX_REG_FIFO_CFG2	0x0050
+#define AG71XX_REG_FIFO_CFG3	0x0054
+#define AG71XX_REG_FIFO_CFG4	0x0058
+#define FIFO_CFG4_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG4_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG4_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG4_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG4_CR		BIT(4)	/* CRC error */
+#define FIFO_CFG4_LM		BIT(5)	/* Length Mismatch */
+#define FIFO_CFG4_LO		BIT(6)	/* Length out of range */
+#define FIFO_CFG4_OK		BIT(7)	/* Packet is OK */
+#define FIFO_CFG4_MC		BIT(8)	/* Multicast Packet */
+#define FIFO_CFG4_BC		BIT(9)	/* Broadcast Packet */
+#define FIFO_CFG4_DR		BIT(10)	/* Dribble */
+#define FIFO_CFG4_LE		BIT(11)	/* Long Event */
+#define FIFO_CFG4_CF		BIT(12)	/* Control Frame */
+#define FIFO_CFG4_PF		BIT(13)	/* Pause Frame */
+#define FIFO_CFG4_UO		BIT(14)	/* Unsupported Opcode */
+#define FIFO_CFG4_VT		BIT(15)	/* VLAN tag detected */
+#define FIFO_CFG4_FT		BIT(16)	/* Frame Truncated */
+#define FIFO_CFG4_UC		BIT(17)	/* Unicast Packet */
+#define FIFO_CFG4_INIT	(FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \
+			 FIFO_CFG4_CE | FIFO_CFG4_CR | FIFO_CFG4_LM | \
+			 FIFO_CFG4_LO | FIFO_CFG4_OK | FIFO_CFG4_MC | \
+			 FIFO_CFG4_BC | FIFO_CFG4_DR | FIFO_CFG4_LE | \
+			 FIFO_CFG4_CF | FIFO_CFG4_PF | FIFO_CFG4_UO | \
+			 FIFO_CFG4_VT)
+
+#define AG71XX_REG_FIFO_CFG5	0x005c
+#define FIFO_CFG5_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG5_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG5_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG5_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG5_LM		BIT(4)	/* Length Mismatch */
+#define FIFO_CFG5_LO		BIT(5)	/* Length Out of Range */
+#define FIFO_CFG5_OK		BIT(6)	/* Packet is OK */
+#define FIFO_CFG5_MC		BIT(7)	/* Multicast Packet */
+#define FIFO_CFG5_BC		BIT(8)	/* Broadcast Packet */
+#define FIFO_CFG5_DR		BIT(9)	/* Dribble */
+#define FIFO_CFG5_CF		BIT(10)	/* Control Frame */
+#define FIFO_CFG5_PF		BIT(11)	/* Pause Frame */
+#define FIFO_CFG5_UO		BIT(12)	/* Unsupported Opcode */
+#define FIFO_CFG5_VT		BIT(13)	/* VLAN tag detected */
+#define FIFO_CFG5_LE		BIT(14)	/* Long Event */
+#define FIFO_CFG5_FT		BIT(15)	/* Frame Truncated */
+#define FIFO_CFG5_16		BIT(16)	/* unknown */
+#define FIFO_CFG5_17		BIT(17)	/* unknown */
+#define FIFO_CFG5_SF		BIT(18)	/* Short Frame */
+#define FIFO_CFG5_BM		BIT(19)	/* Byte Mode */
+#define FIFO_CFG5_INIT	(FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \
+			 FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \
+			 FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \
+			 FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \
+			 FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \
+			 FIFO_CFG5_17 | FIFO_CFG5_SF)
+
+#define AG71XX_REG_TX_CTRL	0x0180
+#define TX_CTRL_TXE		BIT(0)	/* Tx Enable */
+
+#define AG71XX_REG_TX_DESC	0x0184
+#define AG71XX_REG_TX_STATUS	0x0188
+#define TX_STATUS_PS		BIT(0)	/* Packet Sent */
+#define TX_STATUS_UR		BIT(1)	/* Tx Underrun */
+#define TX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_RX_CTRL	0x018c
+#define RX_CTRL_RXE		BIT(0)	/* Rx Enable */
+
+#define AG71XX_DMA_RETRY	10
+#define AG71XX_DMA_DELAY	1
+
+#define AG71XX_REG_RX_DESC	0x0190
+#define AG71XX_REG_RX_STATUS	0x0194
+#define RX_STATUS_PR		BIT(0)	/* Packet Received */
+#define RX_STATUS_OF		BIT(2)	/* Rx Overflow */
+#define RX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_INT_ENABLE	0x0198
+#define AG71XX_REG_INT_STATUS	0x019c
+#define AG71XX_INT_TX_PS	BIT(0)
+#define AG71XX_INT_TX_UR	BIT(1)
+#define AG71XX_INT_TX_BE	BIT(3)
+#define AG71XX_INT_RX_PR	BIT(4)
+#define AG71XX_INT_RX_OF	BIT(6)
+#define AG71XX_INT_RX_BE	BIT(7)
+
+#define AG71XX_REG_FIFO_DEPTH	0x01a8
+#define AG71XX_REG_RX_SM	0x01b0
+#define AG71XX_REG_TX_SM	0x01b4
+
+#define ETH_SWITCH_HEADER_LEN	2
+
+#define AG71XX_DEFAULT_MSG_ENABLE	\
+	(NETIF_MSG_DRV			\
+	| NETIF_MSG_PROBE		\
+	| NETIF_MSG_LINK		\
+	| NETIF_MSG_TIMER		\
+	| NETIF_MSG_IFDOWN		\
+	| NETIF_MSG_IFUP		\
+	| NETIF_MSG_RX_ERR		\
+	| NETIF_MSG_TX_ERR)
+
+#define DESC_EMPTY		BIT(31)
+#define DESC_MORE		BIT(24)
+#define DESC_PKTLEN_M		0xfff
+struct ag71xx_desc {
+	u32 data;
+	u32 ctrl;
+	u32 next;
+	u32 pad;
+} __aligned(4);
+
+#define AG71XX_DESC_SIZE	roundup(sizeof(struct ag71xx_desc), \
+					L1_CACHE_BYTES)
+
+struct ag71xx_buf {
+	union {
+		struct {
+			struct sk_buff *skb;
+			unsigned int len;
+		} tx;
+		struct {
+			dma_addr_t dma_addr;
+			void *rx_buf;
+		} rx;
+	};
+};
+
+struct ag71xx_ring {
+	/* "Hot" fields in the data path. */
+	unsigned int curr;
+	unsigned int dirty;
+
+	/* "Cold" fields - not used in the data path. */
+	struct ag71xx_buf *buf;
+	u16 order;
+	u16 desc_split;
+	dma_addr_t descs_dma;
+	u8 *descs_cpu;
+};
+
+enum ag71xx_type {
+	AR7100,
+	AR7240,
+	AR9130,
+	AR9330,
+	AR9340,
+	QCA9530,
+	QCA9550,
+};
+
+struct ag71xx_dcfg {
+	u32 max_frame_len;
+	const u32 *fifodata;
+	u16 desc_pktlen_mask;
+	bool tx_hang_workaround;
+	enum ag71xx_type type;
+};
+
+struct ag71xx {
+	/* Critical data related to the per-packet data path are clustered
+	 * early in this structure to help improve the D-cache footprint.
+	 */
+	struct ag71xx_ring rx_ring ____cacheline_aligned;
+	struct ag71xx_ring tx_ring ____cacheline_aligned;
+
+	u16 rx_buf_size;
+	u8 rx_buf_offset;
+
+	struct net_device *ndev;
+	struct platform_device *pdev;
+	struct napi_struct napi;
+	u32 msg_enable;
+	const struct ag71xx_dcfg *dcfg;
+
+	/* From this point onwards we're not looking at per-packet fields. */
+	void __iomem *mac_base;
+
+	struct ag71xx_desc *stop_desc;
+	dma_addr_t stop_desc_dma;
+
+	int phy_if_mode;
+
+	struct delayed_work restart_work;
+	struct timer_list oom_timer;
+
+	struct reset_control *mac_reset;
+
+	u32 fifodata[3];
+	int mac_idx;
+
+	struct reset_control *mdio_reset;
+	struct mii_bus *mii_bus;
+	struct clk *clk_mdio;
+	struct clk *clk_eth;
+};
+
+static int ag71xx_desc_empty(struct ag71xx_desc *desc)
+{
+	return (desc->ctrl & DESC_EMPTY) != 0;
+}
+
+static struct ag71xx_desc *ag71xx_ring_desc(struct ag71xx_ring *ring, int idx)
+{
+	return (struct ag71xx_desc *)&ring->descs_cpu[idx * AG71XX_DESC_SIZE];
+}
+
+static int ag71xx_ring_size_order(int size)
+{
+	return fls(size - 1);
+}
+
+static bool ag71xx_is(struct ag71xx *ag, enum ag71xx_type type)
+{
+	return ag->dcfg->type == type;
+}
+
+static void ag71xx_wr(struct ag71xx *ag, unsigned int reg, u32 value)
+{
+	iowrite32(value, ag->mac_base + reg);
+	/* flush write */
+	(void)ioread32(ag->mac_base + reg);
+}
+
+static u32 ag71xx_rr(struct ag71xx *ag, unsigned int reg)
+{
+	return ioread32(ag->mac_base + reg);
+}
+
+static void ag71xx_sb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) | mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_cb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) & ~mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_int_enable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_sb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_MDIO_RETRY; i++) {
+		u32 busy;
+
+		udelay(AG71XX_MDIO_DELAY);
+
+		busy = ag71xx_rr(ag, AG71XX_REG_MII_IND);
+		if (!busy)
+			return 0;
+
+		udelay(AG71XX_MDIO_DELAY);
+	}
+
+	netif_err(ag, link, ndev, "MDIO operation timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static int ag71xx_mdio_mii_read(struct mii_bus *bus, int addr, int reg)
+{
+	struct ag71xx *ag = bus->priv;
+	int err, val;
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	/* enable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, MII_CMD_READ);
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	val = ag71xx_rr(ag, AG71XX_REG_MII_STATUS);
+	/* disable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, 0);
+
+	netif_dbg(ag, link, ag->ndev, "mii_read: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	return val;
+}
+
+static int ag71xx_mdio_mii_write(struct mii_bus *bus, int addr, int reg,
+				 u16 val)
+{
+	struct ag71xx *ag = bus->priv;
+
+	netif_dbg(ag, link, ag->ndev, "mii_write: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	ag71xx_wr(ag, AG71XX_REG_MII_CTRL, val);
+
+	return ag71xx_mdio_wait_busy(ag);
+}
+
+static const u32 ar71xx_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28,
+};
+
+static const u32 ar7240_mdio_div_table[] = {
+	2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96,
+};
+
+static const u32 ar933x_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98,
+};
+
+static int ag71xx_mdio_get_divider(struct ag71xx *ag, u32 *div)
+{
+	unsigned long ref_clock;
+	const u32 *table;
+	int ndivs, i;
+
+	ref_clock = clk_get_rate(ag->clk_mdio);
+	if (!ref_clock)
+		return -EINVAL;
+
+	if (ag71xx_is(ag, AR9330) || ag71xx_is(ag, AR9340)) {
+		table = ar933x_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar933x_mdio_div_table);
+	} else if (ag71xx_is(ag, AR7240)) {
+		table = ar7240_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar7240_mdio_div_table);
+	} else {
+		table = ar71xx_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar71xx_mdio_div_table);
+	}
+
+	for (i = 0; i < ndivs; i++) {
+		unsigned long t;
+
+		t = ref_clock / table[i];
+		if (t <= AG71XX_MDIO_MAX_CLK) {
+			*div = i;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int ag71xx_mdio_reset(struct mii_bus *bus)
+{
+	struct ag71xx *ag = bus->priv;
+	int err;
+	u32 t;
+
+	err = ag71xx_mdio_get_divider(ag, &t);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t | MII_CFG_RESET);
+	usleep_range(100, 200);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t);
+	usleep_range(100, 200);
+
+	return 0;
+}
+
+static int ag71xx_mdio_probe(struct ag71xx *ag)
+{
+	struct device *dev = &ag->pdev->dev;
+	struct net_device *ndev = ag->ndev;
+	static struct mii_bus *mii_bus;
+	struct device_node *np;
+	int err;
+
+	np = dev->of_node;
+	ag->mii_bus = NULL;
+
+	ag->clk_mdio = devm_clk_get(dev, "mdio");
+	if (IS_ERR(ag->clk_mdio)) {
+		netif_err(ag, probe, ndev, "Failed to get mdio clk.\n");
+		return PTR_ERR(ag->clk_mdio);
+	}
+
+	err = clk_prepare_enable(ag->clk_mdio);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n");
+		return err;
+	}
+
+	mii_bus = devm_mdiobus_alloc(dev);
+	if (!mii_bus) {
+		err = -ENOMEM;
+		goto mdio_err_put_clk;
+	}
+
+	ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio");
+	if (IS_ERR(ag->mdio_reset)) {
+		netif_err(ag, probe, ndev, "Failed to get reset mdio.\n");
+		return PTR_ERR(ag->mdio_reset);
+	}
+
+	mii_bus->name = "ag71xx_mdio";
+	mii_bus->read = ag71xx_mdio_mii_read;
+	mii_bus->write = ag71xx_mdio_mii_write;
+	mii_bus->reset = ag71xx_mdio_reset;
+	mii_bus->priv = ag;
+	mii_bus->parent = dev;
+	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx);
+
+	if (!IS_ERR(ag->mdio_reset)) {
+		reset_control_assert(ag->mdio_reset);
+		msleep(100);
+		reset_control_deassert(ag->mdio_reset);
+		msleep(200);
+	}
+
+	err = of_mdiobus_register(mii_bus, np);
+	if (err)
+		goto mdio_err_put_clk;
+
+	ag->mii_bus = mii_bus;
+
+	return 0;
+
+mdio_err_put_clk:
+	clk_disable_unprepare(ag->clk_mdio);
+	return err;
+}
+
+static void ag71xx_mdio_remove(struct ag71xx *ag)
+{
+	if (ag->mii_bus)
+		mdiobus_unregister(ag->mii_bus);
+	clk_disable_unprepare(ag->clk_mdio);
+}
+
+static void ag71xx_hw_stop(struct ag71xx *ag)
+{
+	/* disable all interrupts and stop the rx/tx engine */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, 0);
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+}
+
+static bool ag71xx_check_dma_stuck(struct ag71xx *ag)
+{
+	unsigned long timestamp;
+	u32 rx_sm, tx_sm, rx_fd;
+
+	timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start;
+	if (likely(time_before(jiffies, timestamp + HZ / 10)))
+		return false;
+
+	if (!netif_carrier_ok(ag->ndev))
+		return false;
+
+	rx_sm = ag71xx_rr(ag, AG71XX_REG_RX_SM);
+	if ((rx_sm & 0x7) == 0x3 && ((rx_sm >> 4) & 0x7) == 0x6)
+		return true;
+
+	tx_sm = ag71xx_rr(ag, AG71XX_REG_TX_SM);
+	rx_fd = ag71xx_rr(ag, AG71XX_REG_FIFO_DEPTH);
+	if (((tx_sm >> 4) & 0x7) == 0 && ((rx_sm & 0x7) == 0) &&
+	    ((rx_sm >> 4) & 0x7) == 0 && rx_fd == 0)
+		return true;
+
+	return false;
+}
+
+static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int sent = 0, bytes_compl = 0, n = 0;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size;
+	bool dma_stuck = false;
+
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, tx_queued, ndev, "processing TX ring\n");
+
+	while (ring->dirty + n != ring->curr) {
+		struct ag71xx_desc *desc;
+		struct sk_buff *skb;
+		unsigned int i;
+
+		i = (ring->dirty + n) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+		skb = ring->buf[i].tx.skb;
+
+		if (!flush && !ag71xx_desc_empty(desc)) {
+			if (ag->dcfg->tx_hang_workaround &&
+			    ag71xx_check_dma_stuck(ag)) {
+				schedule_delayed_work(&ag->restart_work,
+						      HZ / 2);
+				dma_stuck = true;
+			}
+			break;
+		}
+
+		if (flush)
+			desc->ctrl |= DESC_EMPTY;
+
+		n++;
+		if (!skb)
+			continue;
+
+		dev_kfree_skb_any(skb);
+		ring->buf[i].tx.skb = NULL;
+
+		bytes_compl += ring->buf[i].tx.len;
+
+		sent++;
+		ring->dirty += n;
+
+		while (n > 0) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+			n--;
+		}
+	}
+
+	netif_dbg(ag, tx_done, ndev, "%d packets sent out\n", sent);
+
+	if (!sent)
+		return 0;
+
+	ag->ndev->stats.tx_bytes += bytes_compl;
+	ag->ndev->stats.tx_packets += sent;
+
+	netdev_completed_queue(ag->ndev, sent, bytes_compl);
+	if ((ring->curr - ring->dirty) < (ring_size * 3) / 4)
+		netif_wake_queue(ag->ndev);
+
+	if (!dma_stuck)
+		cancel_delayed_work(&ag->restart_work);
+
+	return sent;
+}
+
+static void ag71xx_dma_wait_stop(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_DMA_RETRY; i++) {
+		u32 rx, tx;
+
+		mdelay(AG71XX_DMA_DELAY);
+
+		rx = ag71xx_rr(ag, AG71XX_REG_RX_CTRL) & RX_CTRL_RXE;
+		tx = ag71xx_rr(ag, AG71XX_REG_TX_CTRL) & TX_CTRL_TXE;
+		if (!rx && !tx)
+			return;
+	}
+
+	netif_err(ag, hw, ndev, "DMA stop operation timed out\n");
+}
+
+static void ag71xx_dma_reset(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	u32 val;
+	int i;
+
+	/* stop RX and TX */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+
+	/* give the hardware some time to really stop all rx/tx activity
+	 * clearing the descriptors too early causes random memory corruption
+	 */
+	ag71xx_dma_wait_stop(ag);
+
+	/* clear descriptor addresses */
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->stop_desc_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->stop_desc_dma);
+
+	/* clear pending RX/TX interrupts */
+	for (i = 0; i < 256; i++) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+		ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+	}
+
+	/* clear pending errors */
+	ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE | RX_STATUS_OF);
+	ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE | TX_STATUS_UR);
+
+	val = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Rx status: %08x\n",
+			  val);
+
+	val = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+
+	/* mask out reserved bits */
+	val &= ~0xff000000;
+
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Tx status: %08x\n",
+			  val);
+}
+
+static void ag71xx_hw_setup(struct ag71xx *ag)
+{
+	u32 init = MAC_CFG1_INIT;
+
+	/* setup MAC configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, init);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG2,
+		  MAC_CFG2_PAD_CRC_EN | MAC_CFG2_LEN_CHECK);
+
+	/* setup max frame length to zero */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, 0);
+
+	/* setup FIFO configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG0, FIFO_CFG0_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG1, ag->fifodata[0]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG2, ag->fifodata[1]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG4, FIFO_CFG4_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, FIFO_CFG5_INIT);
+}
+
+static unsigned int ag71xx_max_frame_len(unsigned int mtu)
+{
+	return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+}
+
+static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
+{
+	u32 t;
+
+	t = (((u32)mac[5]) << 24) | (((u32)mac[4]) << 16)
+	  | (((u32)mac[3]) << 8) | ((u32)mac[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR1, t);
+
+	t = (((u32)mac[1]) << 24) | (((u32)mac[0]) << 16);
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR2, t);
+}
+
+static void ag71xx_fast_reset(struct ag71xx *ag)
+{
+	struct net_device *dev = ag->ndev;
+	u32 rx_ds;
+	u32 mii_reg;
+
+	ag71xx_hw_stop(ag);
+
+	mii_reg = ag71xx_rr(ag, AG71XX_REG_MII_CFG);
+	rx_ds = ag71xx_rr(ag, AG71XX_REG_RX_DESC);
+
+	ag71xx_tx_packets(ag, true);
+
+	reset_control_assert(ag->mac_reset);
+	usleep_range(10, 20);
+	reset_control_deassert(ag->mac_reset);
+	usleep_range(10, 20);
+
+	ag71xx_dma_reset(ag);
+	ag71xx_hw_setup(ag);
+	ag->tx_ring.curr = 0;
+	ag->tx_ring.dirty = 0;
+	netdev_reset_queue(ag->ndev);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ag->ndev->mtu));
+
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, rx_ds);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, mii_reg);
+
+	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
+}
+
+static void ag71xx_hw_start(struct ag71xx *ag)
+{
+	/* start RX engine */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+
+	/* enable interrupts */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, AG71XX_INT_INIT);
+
+	netif_wake_queue(ag->ndev);
+}
+
+static void ag71xx_link_adjust(struct ag71xx *ag, bool update)
+{
+	struct phy_device *phydev = ag->ndev->phydev;
+	u32 cfg2;
+	u32 ifctl;
+	u32 fifo5;
+
+	if (!phydev->link && update) {
+		ag71xx_hw_stop(ag);
+		return;
+	}
+
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag71xx_fast_reset(ag);
+
+	cfg2 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG2);
+	cfg2 &= ~(MAC_CFG2_IF_1000 | MAC_CFG2_IF_10_100 | MAC_CFG2_FDX);
+	cfg2 |= (phydev->duplex) ? MAC_CFG2_FDX : 0;
+
+	ifctl = ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL);
+	ifctl &= ~(MAC_IFCTL_SPEED);
+
+	fifo5 = ag71xx_rr(ag, AG71XX_REG_FIFO_CFG5);
+	fifo5 &= ~FIFO_CFG5_BM;
+
+	switch (phydev->speed) {
+	case SPEED_1000:
+		cfg2 |= MAC_CFG2_IF_1000;
+		fifo5 |= FIFO_CFG5_BM;
+		break;
+	case SPEED_100:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		ifctl |= MAC_IFCTL_SPEED;
+		break;
+	case SPEED_10:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		break;
+	default:
+		WARN(1, "not supported speed %i\n", phydev->speed);
+		return;
+	}
+
+	if (ag->tx_ring.desc_split) {
+		ag->fifodata[2] &= 0xffff;
+		ag->fifodata[2] |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, ag->fifodata[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG2, cfg2);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5);
+	ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl);
+
+	ag71xx_hw_start(ag);
+
+	if (update)
+		phy_print_status(phydev);
+}
+
+static void ag71xx_phy_link_adjust(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ag71xx_link_adjust(ag, true);
+}
+
+static int ag71xx_phy_connect(struct ag71xx *ag)
+{
+	struct device_node *np = ag->pdev->dev.of_node;
+	struct net_device *ndev = ag->ndev;
+	struct device_node *phy_node;
+	struct phy_device *phydev;
+	int ret;
+
+	if (of_phy_is_fixed_link(np)) {
+		ret = of_phy_register_fixed_link(np);
+		if (ret < 0) {
+			netif_err(ag, probe, ndev, "Failed to register fixed PHY link: %d\n",
+				  ret);
+			return ret;
+		}
+
+		phy_node = of_node_get(np);
+	} else {
+		phy_node = of_parse_phandle(np, "phy-handle", 0);
+	}
+
+	if (!phy_node) {
+		netif_err(ag, probe, ndev, "Could not find valid phy node\n");
+		return -ENODEV;
+	}
+
+	phydev = of_phy_connect(ag->ndev, phy_node, ag71xx_phy_link_adjust,
+				0, ag->phy_if_mode);
+
+	of_node_put(phy_node);
+
+	if (!phydev) {
+		netif_err(ag, probe, ndev, "Could not connect to PHY device\n");
+		return -ENODEV;
+	}
+
+	phy_attached_info(phydev);
+
+	return 0;
+}
+
+static void ag71xx_ring_tx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	u32 bytes_compl = 0, pkts_compl = 0;
+	struct net_device *ndev = ag->ndev;
+
+	while (ring->curr != ring->dirty) {
+		struct ag71xx_desc *desc;
+		u32 i = ring->dirty & ring_mask;
+
+		desc = ag71xx_ring_desc(ring, i);
+		if (!ag71xx_desc_empty(desc)) {
+			desc->ctrl = 0;
+			ndev->stats.tx_errors++;
+		}
+
+		if (ring->buf[i].tx.skb) {
+			bytes_compl += ring->buf[i].tx.len;
+			pkts_compl++;
+			dev_kfree_skb_any(ring->buf[i].tx.skb);
+		}
+		ring->buf[i].tx.skb = NULL;
+		ring->dirty++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+}
+
+static void ag71xx_ring_tx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_size = BIT(ring->order);
+	int ring_mask = ring_size - 1;
+	int i;
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		desc->ctrl = DESC_EMPTY;
+		ring->buf[i].tx.skb = NULL;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_ring_rx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_size = BIT(ring->order);
+	int i;
+
+	if (!ring->buf)
+		return;
+
+	for (i = 0; i < ring_size; i++)
+		if (ring->buf[i].rx.rx_buf) {
+			dma_unmap_single(&ag->pdev->dev,
+					 ring->buf[i].rx.dma_addr,
+					 ag->rx_buf_size, DMA_FROM_DEVICE);
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+		}
+}
+
+static int ag71xx_buffer_size(struct ag71xx *ag)
+{
+	return ag->rx_buf_size +
+	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+			       int offset,
+			       void *(*alloc)(unsigned int size))
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct ag71xx_desc *desc;
+	void *data;
+
+	desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]);
+
+	data = alloc(ag71xx_buffer_size(ag));
+	if (!data)
+		return false;
+
+	buf->rx.rx_buf = data;
+	buf->rx.dma_addr = dma_map_single(&ag->pdev->dev, data, ag->rx_buf_size,
+					  DMA_FROM_DEVICE);
+	desc->data = (u32)buf->rx.dma_addr + offset;
+	return true;
+}
+
+static int ag71xx_ring_rx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask = BIT(ring->order) - 1;
+	int ring_size = BIT(ring->order);
+	unsigned int i;
+	int ret;
+
+	ret = 0;
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		netif_dbg(ag, rx_status, ndev, "RX desc at %p, next is %08x\n",
+			  desc, desc->next);
+	}
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], ag->rx_buf_offset,
+					netdev_alloc_frag)) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		desc->ctrl = DESC_EMPTY;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+
+	return ret;
+}
+
+static int ag71xx_ring_rx_refill(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	int offset = ag->rx_buf_offset;
+	unsigned int count;
+
+	count = 0;
+	for (; ring->curr - ring->dirty > 0; ring->dirty++) {
+		struct ag71xx_desc *desc;
+		unsigned int i;
+
+		i = ring->dirty & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ring->buf[i].rx.rx_buf &&
+		    !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset,
+					napi_alloc_frag))
+			break;
+
+		desc->ctrl = DESC_EMPTY;
+		count++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netif_dbg(ag, rx_status, ag->ndev, "%u rx descriptors refilled\n",
+		  count);
+
+	return count;
+}
+
+static int ag71xx_rings_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size, tx_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+	tx_size = BIT(tx->order);
+
+	tx->buf = kcalloc(ring_size, sizeof(*tx->buf), GFP_KERNEL);
+	if (!tx->buf)
+		return -ENOMEM;
+
+	tx->descs_cpu = dma_alloc_coherent(&ag->pdev->dev,
+					   ring_size * AG71XX_DESC_SIZE,
+					   &tx->descs_dma, GFP_ATOMIC);
+	if (!tx->descs_cpu) {
+		kfree(tx->buf);
+		tx->buf = NULL;
+		return -ENOMEM;
+	}
+
+	rx->buf = &tx->buf[BIT(tx->order)];
+	rx->descs_cpu = ((void *)tx->descs_cpu) + tx_size * AG71XX_DESC_SIZE;
+	rx->descs_dma = tx->descs_dma + tx_size * AG71XX_DESC_SIZE;
+
+	ag71xx_ring_tx_init(ag);
+	return ag71xx_ring_rx_init(ag);
+}
+
+static void ag71xx_rings_free(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+
+	if (tx->descs_cpu)
+		dma_free_coherent(&ag->pdev->dev, ring_size * AG71XX_DESC_SIZE,
+				  tx->descs_cpu, tx->descs_dma);
+
+	kfree(tx->buf);
+
+	tx->descs_cpu = NULL;
+	rx->descs_cpu = NULL;
+	tx->buf = NULL;
+	rx->buf = NULL;
+}
+
+static void ag71xx_rings_cleanup(struct ag71xx *ag)
+{
+	ag71xx_ring_rx_clean(ag);
+	ag71xx_ring_tx_clean(ag);
+	ag71xx_rings_free(ag);
+
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_hw_init(struct ag71xx *ag)
+{
+	ag71xx_hw_stop(ag);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR);
+	usleep_range(20, 30);
+
+	reset_control_assert(ag->mac_reset);
+	msleep(100);
+	reset_control_deassert(ag->mac_reset);
+	msleep(200);
+
+	ag71xx_hw_setup(ag);
+
+	ag71xx_dma_reset(ag);
+}
+
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+	int ret;
+
+	ret = ag71xx_rings_init(ag);
+	if (ret)
+		return ret;
+
+	napi_enable(&ag->napi);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+	netif_start_queue(ag->ndev);
+
+	return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+	netif_stop_queue(ag->ndev);
+
+	ag71xx_hw_stop(ag);
+	ag71xx_dma_reset(ag);
+
+	napi_disable(&ag->napi);
+	del_timer_sync(&ag->oom_timer);
+
+	ag71xx_rings_cleanup(ag);
+}
+
+static int ag71xx_open(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+	unsigned int max_frame_len;
+	int ret;
+
+	max_frame_len = ag71xx_max_frame_len(ndev->mtu);
+	ag->rx_buf_size =
+		SKB_DATA_ALIGN(max_frame_len + NET_SKB_PAD + NET_IP_ALIGN);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+	ag71xx_hw_set_macaddr(ag, ndev->dev_addr);
+
+	ret = ag71xx_hw_enable(ag);
+	if (ret)
+		goto err;
+
+	ret = ag71xx_phy_connect(ag);
+	if (ret)
+		goto err;
+
+	phy_start(ndev->phydev);
+
+	return 0;
+
+err:
+	ag71xx_rings_cleanup(ag);
+	return ret;
+}
+
+static int ag71xx_stop(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	phy_stop(ndev->phydev);
+	phy_disconnect(ndev->phydev);
+	ag71xx_hw_disable(ag);
+
+	return 0;
+}
+
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+	int i, ring_mask, ndesc, split;
+	struct ag71xx_desc *desc;
+
+	ring_mask = BIT(ring->order) - 1;
+	ndesc = 0;
+	split = ring->desc_split;
+
+	if (!split)
+		split = len;
+
+	while (len > 0) {
+		unsigned int cur_len = len;
+
+		i = (ring->curr + ndesc) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_desc_empty(desc))
+			return -1;
+
+		if (cur_len > split) {
+			cur_len = split;
+
+			/*  TX will hang if DMA transfers <= 4 bytes,
+			 * make sure next segment is more than 4 bytes long.
+			 */
+			if (len <= split + 4)
+				cur_len -= 4;
+		}
+
+		desc->data = addr;
+		addr += cur_len;
+		len -= cur_len;
+
+		if (len > 0)
+			cur_len |= DESC_MORE;
+
+		/* prevent early tx attempt of this descriptor */
+		if (!ndesc)
+			cur_len |= DESC_EMPTY;
+
+		desc->ctrl = cur_len;
+		ndesc++;
+	}
+
+	return ndesc;
+}
+
+static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
+					  struct net_device *ndev)
+{
+	int i, n, ring_min, ring_mask, ring_size;
+	struct ag71xx *ag = netdev_priv(ndev);
+	struct ag71xx_ring *ring;
+	struct ag71xx_desc *desc;
+	dma_addr_t dma_addr;
+
+	ring = &ag->tx_ring;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	if (skb->len <= 4) {
+		netif_dbg(ag, tx_err, ndev, "packet len is too small\n");
+		goto err_drop;
+	}
+
+	dma_addr = dma_map_single(&ag->pdev->dev, skb->data, skb->len,
+				  DMA_TO_DEVICE);
+
+	i = ring->curr & ring_mask;
+	desc = ag71xx_ring_desc(ring, i);
+
+	/* setup descriptor fields */
+	n = ag71xx_fill_dma_desc(ring, (u32)dma_addr,
+				 skb->len & ag->dcfg->desc_pktlen_mask);
+	if (n < 0)
+		goto err_drop_unmap;
+
+	i = (ring->curr + n - 1) & ring_mask;
+	ring->buf[i].tx.len = skb->len;
+	ring->buf[i].tx.skb = skb;
+
+	netdev_sent_queue(ndev, skb->len);
+
+	skb_tx_timestamp(skb);
+
+	desc->ctrl &= ~DESC_EMPTY;
+	ring->curr += n;
+
+	/* flush descriptor */
+	wmb();
+
+	ring_min = 2;
+	if (ring->desc_split)
+		ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+	if (ring->curr - ring->dirty >= ring_size - ring_min) {
+		netif_dbg(ag, tx_err, ndev, "tx queue full\n");
+		netif_stop_queue(ndev);
+	}
+
+	netif_dbg(ag, tx_queued, ndev, "packet injected into TX queue\n");
+
+	/* enable TX engine */
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, TX_CTRL_TXE);
+
+	return NETDEV_TX_OK;
+
+err_drop_unmap:
+	dma_unmap_single(&ag->pdev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_drop:
+	ndev->stats.tx_dropped++;
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+	if (!ndev->phydev)
+		return -EINVAL;
+
+	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
+}
+
+static void ag71xx_oom_timer_handler(struct timer_list *t)
+{
+	struct ag71xx *ag = from_timer(ag, t, oom_timer);
+
+	napi_schedule(&ag->napi);
+}
+
+static void ag71xx_tx_timeout(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	netif_err(ag, tx_err, ndev, "tx timeout\n");
+
+	schedule_delayed_work(&ag->restart_work, 1);
+}
+
+static void ag71xx_restart_work_func(struct work_struct *work)
+{
+	struct ag71xx *ag = container_of(work, struct ag71xx,
+					 restart_work.work);
+	struct net_device *ndev = ag->ndev;
+
+	rtnl_lock();
+	ag71xx_hw_disable(ag);
+	ag71xx_hw_enable(ag);
+	if (ndev->phydev->link)
+		ag71xx_link_adjust(ag, false);
+	rtnl_unlock();
+}
+
+static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
+{
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size, done = 0;
+	unsigned int pktlen_mask, offset;
+	struct sk_buff *next, *skb;
+	struct ag71xx_ring *ring;
+	struct list_head rx_list;
+
+	ring = &ag->rx_ring;
+	pktlen_mask = ag->dcfg->desc_pktlen_mask;
+	offset = ag->rx_buf_offset;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, rx_status, ndev, "rx packets, limit=%d, curr=%u, dirty=%u\n",
+		  limit, ring->curr, ring->dirty);
+
+	INIT_LIST_HEAD(&rx_list);
+
+	while (done < limit) {
+		unsigned int i = ring->curr & ring_mask;
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+		int pktlen;
+		int err = 0;
+
+		if (ag71xx_desc_empty(desc))
+			break;
+
+		if ((ring->dirty + ring_size) == ring->curr) {
+			WARN_ONCE(1, "RX out of ring");
+			break;
+		}
+
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+
+		pktlen = desc->ctrl & pktlen_mask;
+		pktlen -= ETH_FCS_LEN;
+
+		dma_unmap_single(&ag->pdev->dev, ring->buf[i].rx.dma_addr,
+				 ag->rx_buf_size, DMA_FROM_DEVICE);
+
+		ndev->stats.rx_packets++;
+		ndev->stats.rx_bytes += pktlen;
+
+		skb = build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag));
+		if (!skb) {
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+			goto next;
+		}
+
+		skb_reserve(skb, offset);
+		skb_put(skb, pktlen);
+
+		if (err) {
+			ndev->stats.rx_dropped++;
+			kfree_skb(skb);
+		} else {
+			skb->dev = ndev;
+			skb->ip_summed = CHECKSUM_NONE;
+			list_add_tail(&skb->list, &rx_list);
+		}
+
+next:
+		ring->buf[i].rx.rx_buf = NULL;
+		done++;
+
+		ring->curr++;
+	}
+
+	ag71xx_ring_rx_refill(ag);
+
+	list_for_each_entry_safe(skb, next, &rx_list, list)
+		skb->protocol = eth_type_trans(skb, ndev);
+	netif_receive_skb_list(&rx_list);
+
+	netif_dbg(ag, rx_status, ndev, "rx finish, curr=%u, dirty=%u, done=%d\n",
+		  ring->curr, ring->dirty, done);
+
+	return done;
+}
+
+static int ag71xx_poll(struct napi_struct *napi, int limit)
+{
+	struct ag71xx *ag = container_of(napi, struct ag71xx, napi);
+	struct ag71xx_ring *rx_ring = &ag->rx_ring;
+	int rx_ring_size = BIT(rx_ring->order);
+	struct net_device *ndev = ag->ndev;
+	int tx_done, rx_done;
+	u32 status;
+
+	tx_done = ag71xx_tx_packets(ag, false);
+
+	netif_dbg(ag, rx_status, ndev, "processing RX ring\n");
+	rx_done = ag71xx_rx_packets(ag, limit);
+
+	if (!rx_ring->buf[rx_ring->dirty % rx_ring_size].rx.rx_buf)
+		goto oom;
+
+	status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (unlikely(status & RX_STATUS_OF)) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_OF);
+		ndev->stats.rx_fifo_errors++;
+
+		/* restart RX */
+		ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+	}
+
+	if (rx_done < limit) {
+		if (status & RX_STATUS_PR)
+			goto more;
+
+		status = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+		if (status & TX_STATUS_PS)
+			goto more;
+
+		netif_dbg(ag, rx_status, ndev, "disable polling mode, rx=%d, tx=%d,limit=%d\n",
+			  rx_done, tx_done, limit);
+
+		napi_complete(napi);
+
+		/* enable interrupts */
+		ag71xx_int_enable(ag, AG71XX_INT_POLL);
+		return rx_done;
+	}
+
+more:
+	netif_dbg(ag, rx_status, ndev, "stay in polling mode, rx=%d, tx=%d, limit=%d\n",
+		  rx_done, tx_done, limit);
+	return limit;
+
+oom:
+	netif_err(ag, rx_err, ndev, "out of memory\n");
+
+	mod_timer(&ag->oom_timer, jiffies + AG71XX_OOM_REFILL);
+	napi_complete(napi);
+	return 0;
+}
+
+static irqreturn_t ag71xx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct ag71xx *ag;
+	u32 status;
+
+	ag = netdev_priv(ndev);
+	status = ag71xx_rr(ag, AG71XX_REG_INT_STATUS);
+
+	if (unlikely(!status))
+		return IRQ_NONE;
+
+	if (unlikely(status & AG71XX_INT_ERR)) {
+		if (status & AG71XX_INT_TX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE);
+			netif_err(ag, intr, ndev, "TX BUS error\n");
+		}
+		if (status & AG71XX_INT_RX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE);
+			netif_err(ag, intr, ndev, "RX BUS error\n");
+		}
+	}
+
+	if (likely(status & AG71XX_INT_POLL)) {
+		ag71xx_int_disable(ag, AG71XX_INT_POLL);
+		netif_dbg(ag, intr, ndev, "enable polling mode\n");
+		napi_schedule(&ag->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ndev->mtu = new_mtu;
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ndev->mtu));
+
+	return 0;
+}
+
+static const struct net_device_ops ag71xx_netdev_ops = {
+	.ndo_open		= ag71xx_open,
+	.ndo_stop		= ag71xx_stop,
+	.ndo_start_xmit		= ag71xx_hard_start_xmit,
+	.ndo_do_ioctl		= ag71xx_do_ioctl,
+	.ndo_tx_timeout		= ag71xx_tx_timeout,
+	.ndo_change_mtu		= ag71xx_change_mtu,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static const u32 ar71xx_addr_ar7100[] = {
+	0x19000000, 0x1a000000,
+};
+
+static int ag71xx_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct ag71xx_dcfg *dcfg;
+	struct net_device *ndev;
+	struct resource *res;
+	const void *mac_addr;
+	int tx_size, err, i;
+	struct ag71xx *ag;
+
+	if (!np)
+		return -ENODEV;
+
+	ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*ag));
+	if (!ndev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	dcfg = of_device_get_match_data(&pdev->dev);
+	if (!dcfg)
+		return -EINVAL;
+
+	ag = netdev_priv(ndev);
+	ag->mac_idx = -1;
+	for (i = 0; i < ARRAY_SIZE(ar71xx_addr_ar7100); i++) {
+		if (ar71xx_addr_ar7100[i] == res->start)
+			ag->mac_idx = i;
+	}
+
+	if (ag->mac_idx < 0) {
+		netif_err(ag, probe, ndev, "unknown mac idx\n");
+		return -EINVAL;
+	}
+
+	ag->clk_eth = devm_clk_get(&pdev->dev, "eth");
+	if (IS_ERR(ag->clk_eth)) {
+		netif_err(ag, probe, ndev, "Failed to get eth clk.\n");
+		return PTR_ERR(ag->clk_eth);
+	}
+
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	ag->pdev = pdev;
+	ag->ndev = ndev;
+	ag->dcfg = dcfg;
+	ag->msg_enable = netif_msg_init(-1, AG71XX_DEFAULT_MSG_ENABLE);
+	memcpy(ag->fifodata, dcfg->fifodata, sizeof(ag->fifodata));
+
+	ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+	if (IS_ERR(ag->mac_reset)) {
+		netif_err(ag, probe, ndev, "missing mac reset\n");
+		err = PTR_ERR(ag->mac_reset);
+		goto err_free;
+	}
+
+	ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start,
+					    res->end - res->start + 1);
+	if (!ag->mac_base) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+			       0x0, dev_name(&pdev->dev), ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+			  ndev->irq);
+		goto err_free;
+	}
+
+	ndev->netdev_ops = &ag71xx_netdev_ops;
+
+	INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func);
+	timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0);
+
+	tx_size = AG71XX_TX_RING_SIZE_DEFAULT;
+	ag->rx_ring.order = ag71xx_ring_size_order(AG71XX_RX_RING_SIZE_DEFAULT);
+
+	ndev->min_mtu = 68;
+	ndev->max_mtu = dcfg->max_frame_len - ag71xx_max_frame_len(0);
+
+	ag->rx_buf_offset = NET_SKB_PAD;
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag->rx_buf_offset += NET_IP_ALIGN;
+
+	if (ag71xx_is(ag, AR7100)) {
+		ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+		tx_size *= AG71XX_TX_RING_DS_PER_PKT;
+	}
+	ag->tx_ring.order = ag71xx_ring_size_order(tx_size);
+
+	ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+					    sizeof(struct ag71xx_desc),
+					    &ag->stop_desc_dma, GFP_KERNEL);
+	if (!ag->stop_desc)
+		goto err_free;
+
+	ag->stop_desc->data = 0;
+	ag->stop_desc->ctrl = 0;
+	ag->stop_desc->next = (u32)ag->stop_desc_dma;
+
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
+		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!mac_addr || !is_valid_ether_addr(ndev->dev_addr)) {
+		netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
+		eth_random_addr(ndev->dev_addr);
+	}
+
+	ag->phy_if_mode = of_get_phy_mode(np);
+	if (ag->phy_if_mode < 0) {
+		netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+		err = ag->phy_if_mode;
+		goto err_free;
+	}
+
+	netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+
+	err = clk_prepare_enable(ag->clk_eth);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+		goto err_free;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+
+	ag71xx_hw_init(ag);
+
+	err = ag71xx_mdio_probe(ag);
+	if (err)
+		goto err_put_clk;
+
+	platform_set_drvdata(pdev, ndev);
+
+	err = register_netdev(ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to register net device\n");
+		platform_set_drvdata(pdev, NULL);
+		goto err_mdio_remove;
+	}
+
+	netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n",
+		   (unsigned long)ag->mac_base, ndev->irq,
+		   phy_modes(ag->phy_if_mode));
+
+	return 0;
+
+err_mdio_remove:
+	ag71xx_mdio_remove(ag);
+err_put_clk:
+	clk_disable_unprepare(ag->clk_eth);
+err_free:
+	free_netdev(ndev);
+	return err;
+}
+
+static int ag71xx_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct ag71xx *ag;
+
+	if (!ndev)
+		return 0;
+
+	ag = netdev_priv(ndev);
+	unregister_netdev(ndev);
+	ag71xx_mdio_remove(ag);
+	clk_disable_unprepare(ag->clk_eth);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const u32 ar71xx_fifo_ar7100[] = {
+	0x0fff0000, 0x00001fff, 0x00780fff,
+};
+
+static const u32 ar71xx_fifo_ar9130[] = {
+	0x0fff0000, 0x00001fff, 0x008001ff,
+};
+
+static const u32 ar71xx_fifo_ar9330[] = {
+	0x0010ffff, 0x015500aa, 0x01f00140,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7100 = {
+	.type = AR7100,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7240 = {
+	.type = AR7240,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9130 = {
+	.type = AR9130,
+	.fifodata = ar71xx_fifo_ar9130,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9330 = {
+	.type = AR9330,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9340 = {
+	.type = AR9340,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9530 = {
+	.type = QCA9530,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9550 = {
+	.type = QCA9550,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct of_device_id ag71xx_match[] = {
+	{ .compatible = "qca,ar7100-eth", .data = &ag71xx_dcfg_ar7100 },
+	{ .compatible = "qca,ar7240-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7241-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7242-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar9130-eth", .data = &ag71xx_dcfg_ar9130 },
+	{ .compatible = "qca,ar9330-eth", .data = &ag71xx_dcfg_ar9330 },
+	{ .compatible = "qca,ar9340-eth", .data = &ag71xx_dcfg_ar9340 },
+	{ .compatible = "qca,qca9530-eth", .data = &ag71xx_dcfg_qca9530 },
+	{ .compatible = "qca,qca9550-eth", .data = &ag71xx_dcfg_qca9550 },
+	{ .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 },
+	{}
+};
+
+static struct platform_driver ag71xx_driver = {
+	.probe		= ag71xx_probe,
+	.remove		= ag71xx_remove,
+	.driver = {
+		.name	= "ag71xx",
+		.of_match_table = ag71xx_match,
+	}
+};
+
+module_platform_driver(ag71xx_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 25bf085324b8..be7f9cebb675 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2201,7 +2201,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 					  struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
-	u16 tpd_req = 1;
+	u16 tpd_req;
 	struct atl1c_tpd_desc *tpd;
 	enum atl1c_trans_queue type = atl1c_trans_normal;
 
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index b123509d385f..e9017caf024d 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM
 	default y
 	depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
 		   SIBYTE_SB1xxx_SOC
+	select DIMLIB
 	---help---
 	  If you have a network (Ethernet) chipset belonging to this class,
 	  say Y.
@@ -198,6 +199,7 @@ config BNXT
 	select FW_LOADER
 	select LIBCRC32C
 	select NET_DEVLINK
+	select PAGE_POOL
 	---help---
 	  This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
 	  Ethernet cards.  To compile this driver as a module, choose M here:
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 85e610210477..291e4afd4a1a 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2659,7 +2659,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 	priv = netdev_priv(dev);
-	memset(priv, 0, sizeof(*priv));
 
 	/* initialize default and fetch platform data */
 	priv->enet_is_sw = true;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index cae9b77ff44b..b9c5cea8db16 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -609,7 +609,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 	unsigned int i;
 
@@ -992,7 +992,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_priv *priv =
 		container_of(napi, struct bcm_sysport_priv, napi);
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_desc_rx(priv, budget);
@@ -1016,8 +1016,8 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (priv->dim.use_dim) {
-		net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
-			       priv->dim.bytes, &dim_sample);
+		dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
+				  priv->dim.bytes, &dim_sample);
 		net_dim(&priv->dim.dim, dim_sample);
 	}
 
@@ -1087,16 +1087,16 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 
 static void bcm_sysport_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bcm_sysport_net_dim *ndim =
 			container_of(dim, struct bcm_sysport_net_dim, dim);
 	struct bcm_sysport_priv *priv =
 			container_of(ndim, struct bcm_sysport_priv, dim);
-	struct net_dim_cq_moder cur_profile =
-			net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+	struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode,
+								    dim->profile_ix);
 
 	bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 /* RX and misc interrupt routine */
@@ -1437,7 +1437,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
 	struct bcm_sysport_net_dim *dim = &priv->dim;
 
 	INIT_WORK(&dim->dim.work, cb);
-	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
 	dim->packets = 0;
 	dim->bytes = 0;
@@ -1446,7 +1446,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
 static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
 {
 	struct bcm_sysport_net_dim *dim = &priv->dim;
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	usecs = priv->rx_coalesce_usecs;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 86193931203a..6d80735fbc7f 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -11,7 +11,7 @@
 #include <linux/bitmap.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 
 /* Receive/transmit descriptor format */
 #define DESC_ADDR_HI_STATUS_LEN	0x00
@@ -702,7 +702,7 @@ struct bcm_sysport_net_dim {
 	u16			event_ctr;
 	unsigned long		packets;
 	unsigned long		bytes;
-	struct net_dim		dim;
+	struct dim		dim;
 };
 
 /* Software view of the TX ring */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 008ad0ca89ba..656ed80647f0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -684,7 +684,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
 		if (unlikely(gfpflags_allow_blocking(gfp_mask)))
 			return (void *)__get_free_page(gfp_mask);
 
-		return netdev_alloc_frag(fp->rx_frag_size);
+		return napi_alloc_frag(fp->rx_frag_size);
 	}
 
 	return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask);
@@ -3857,9 +3857,12 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		if (!(bp->flags & TX_TIMESTAMPING_EN)) {
+			bp->eth_stats.ptp_skip_tx_ts++;
 			BNX2X_ERR("Tx timestamping was not enabled, this packet will not be timestamped\n");
 		} else if (bp->ptp_tx_skb) {
-			BNX2X_ERR("The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+			bp->eth_stats.ptp_skip_tx_ts++;
+			netdev_err_once(bp->dev,
+					"Device supports only a single outstanding packet to timestamp, this packet won't be timestamped\n");
 		} else {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			/* schedule check for Tx timestamp */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 51fc845de31a..4a0ba6801c9e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -182,7 +182,9 @@ static const struct {
 	{ STATS_OFFSET32(driver_filtered_tx_pkt),
 				4, false, "driver_filtered_tx_pkt" },
 	{ STATS_OFFSET32(eee_tx_lpi),
-				4, true, "Tx LPI entry count"}
+				4, true, "Tx LPI entry count"},
+	{ STATS_OFFSET32(ptp_skip_tx_ts),
+				4, false, "ptp_skipped_tx_tstamp" },
 };
 
 #define BNX2X_NUM_STATS		ARRAY_SIZE(bnx2x_stats_arr)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 03ac10b1cd1e..2cc14db8f0ec 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -15214,11 +15214,24 @@ static void bnx2x_ptp_task(struct work_struct *work)
 	u32 val_seq;
 	u64 timestamp, ns;
 	struct skb_shared_hwtstamps shhwtstamps;
+	bool bail = true;
+	int i;
+
+	/* FW may take a while to complete timestamping; try a bit and if it's
+	 * still not complete, may indicate an error state - bail out then.
+	 */
+	for (i = 0; i < 10; i++) {
+		/* Read Tx timestamp registers */
+		val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID :
+				 NIG_REG_P0_TLLH_PTP_BUF_SEQID);
+		if (val_seq & 0x10000) {
+			bail = false;
+			break;
+		}
+		msleep(1 << i);
+	}
 
-	/* Read Tx timestamp registers */
-	val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID :
-			 NIG_REG_P0_TLLH_PTP_BUF_SEQID);
-	if (val_seq & 0x10000) {
+	if (!bail) {
 		/* There is a valid timestamp value */
 		timestamp = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_TS_MSB :
 				   NIG_REG_P0_TLLH_PTP_BUF_TS_MSB);
@@ -15233,16 +15246,18 @@ static void bnx2x_ptp_task(struct work_struct *work)
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ns_to_ktime(ns);
 		skb_tstamp_tx(bp->ptp_tx_skb, &shhwtstamps);
-		dev_kfree_skb_any(bp->ptp_tx_skb);
-		bp->ptp_tx_skb = NULL;
 
 		DP(BNX2X_MSG_PTP, "Tx timestamp, timestamp cycles = %llu, ns = %llu\n",
 		   timestamp, ns);
 	} else {
-		DP(BNX2X_MSG_PTP, "There is no valid Tx timestamp yet\n");
-		/* Reschedule to keep checking for a valid timestamp value */
-		schedule_work(&bp->ptp_task);
+		DP(BNX2X_MSG_PTP,
+		   "Tx timestamp is not recorded (register read=%u)\n",
+		   val_seq);
+		bp->eth_stats.ptp_skip_tx_ts++;
 	}
+
+	dev_kfree_skb_any(bp->ptp_tx_skb);
+	bp->ptp_tx_skb = NULL;
 }
 
 void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
index b2644ed13d06..d55e63692cf3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
@@ -207,6 +207,9 @@ struct bnx2x_eth_stats {
 	u32 driver_filtered_tx_pkt;
 	/* src: Clear-on-Read register; Will not survive PMF Migration */
 	u32 eee_tx_lpi;
+
+	/* PTP */
+	u32 ptp_skip_tx_ts;
 };
 
 struct bnx2x_eth_q_stats {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f758b2e0591f..3f632028eff0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -54,6 +54,7 @@
 #include <net/pkt_cls.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <net/page_pool.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -668,19 +669,20 @@ next_tx_int:
 }
 
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+					 struct bnxt_rx_ring_info *rxr,
 					 gfp_t gfp)
 {
 	struct device *dev = &bp->pdev->dev;
 	struct page *page;
 
-	page = alloc_page(gfp);
+	page = page_pool_dev_alloc_pages(rxr->page_pool);
 	if (!page)
 		return NULL;
 
 	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
 				      DMA_ATTR_WEAK_ORDERING);
 	if (dma_mapping_error(dev, *mapping)) {
-		__free_page(page);
+		page_pool_recycle_direct(rxr->page_pool, page);
 		return NULL;
 	}
 	*mapping += bp->rx_dma_offset;
@@ -716,7 +718,8 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	dma_addr_t mapping;
 
 	if (BNXT_RX_PAGE_MODE(bp)) {
-		struct page *page = __bnxt_alloc_rx_page(bp, &mapping, gfp);
+		struct page *page =
+			__bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
 
 		if (!page)
 			return -ENOMEM;
@@ -1989,6 +1992,9 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		}
 	}
 
+	if (event & BNXT_REDIRECT_EVENT)
+		xdp_do_flush_map();
+
 	if (event & BNXT_TX_EVENT) {
 		struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
 		u16 prod = txr->tx_prod;
@@ -2130,12 +2136,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
 		}
 	}
 	if (bp->flags & BNXT_FLAG_DIM) {
-		struct net_dim_sample dim_sample;
+		struct dim_sample dim_sample;
 
-		net_dim_sample(cpr->event_ctr,
-			       cpr->rx_packets,
-			       cpr->rx_bytes,
-			       &dim_sample);
+		dim_update_sample(cpr->event_ctr,
+				  cpr->rx_packets,
+				  cpr->rx_bytes,
+				  &dim_sample);
 		net_dim(&cpr->dim, dim_sample);
 	}
 	return work_done;
@@ -2254,9 +2260,23 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
 
 		for (j = 0; j < max_idx;) {
 			struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j];
-			struct sk_buff *skb = tx_buf->skb;
+			struct sk_buff *skb;
 			int k, last;
 
+			if (i < bp->tx_nr_rings_xdp &&
+			    tx_buf->action == XDP_REDIRECT) {
+				dma_unmap_single(&pdev->dev,
+					dma_unmap_addr(tx_buf, mapping),
+					dma_unmap_len(tx_buf, len),
+					PCI_DMA_TODEVICE);
+				xdp_return_frame(tx_buf->xdpf);
+				tx_buf->action = 0;
+				tx_buf->xdpf = NULL;
+				j++;
+				continue;
+			}
+
+			skb = tx_buf->skb;
 			if (!skb) {
 				j++;
 				continue;
@@ -2343,7 +2363,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 				dma_unmap_page_attrs(&pdev->dev, mapping,
 						     PAGE_SIZE, bp->rx_dir,
 						     DMA_ATTR_WEAK_ORDERING);
-				__free_page(data);
+				page_pool_recycle_direct(rxr->page_pool, data);
 			} else {
 				dma_unmap_single_attrs(&pdev->dev, mapping,
 						       bp->rx_buf_use_size,
@@ -2480,6 +2500,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 		if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
 			xdp_rxq_info_unreg(&rxr->xdp_rxq);
 
+		page_pool_destroy(rxr->page_pool);
+		rxr->page_pool = NULL;
+
 		kfree(rxr->rx_tpa);
 		rxr->rx_tpa = NULL;
 
@@ -2494,6 +2517,26 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 	}
 }
 
+static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
+				   struct bnxt_rx_ring_info *rxr)
+{
+	struct page_pool_params pp = { 0 };
+
+	pp.pool_size = bp->rx_ring_size;
+	pp.nid = dev_to_node(&bp->pdev->dev);
+	pp.dev = &bp->pdev->dev;
+	pp.dma_dir = DMA_BIDIRECTIONAL;
+
+	rxr->page_pool = page_pool_create(&pp);
+	if (IS_ERR(rxr->page_pool)) {
+		int err = PTR_ERR(rxr->page_pool);
+
+		rxr->page_pool = NULL;
+		return err;
+	}
+	return 0;
+}
+
 static int bnxt_alloc_rx_rings(struct bnxt *bp)
 {
 	int i, rc, agg_rings = 0, tpa_rings = 0;
@@ -2513,10 +2556,22 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
 
 		ring = &rxr->rx_ring_struct;
 
+		rc = bnxt_alloc_rx_page_pool(bp, rxr);
+		if (rc)
+			return rc;
+
 		rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
 		if (rc < 0)
 			return rc;
 
+		rc = xdp_rxq_info_reg_mem_model(&rxr->xdp_rxq,
+						MEM_TYPE_PAGE_POOL,
+						rxr->page_pool);
+		if (rc) {
+			xdp_rxq_info_unreg(&rxr->xdp_rxq);
+			return rc;
+		}
+
 		rc = bnxt_alloc_ring(bp, &ring->ring_mem);
 		if (rc)
 			return rc;
@@ -5508,7 +5563,16 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
 
 static int bnxt_get_func_stat_ctxs(struct bnxt *bp)
 {
-	return bp->cp_nr_rings + bnxt_get_ulp_stat_ctxs(bp);
+	int ulp_stat = bnxt_get_ulp_stat_ctxs(bp);
+	int cp = bp->cp_nr_rings;
+
+	if (!ulp_stat)
+		return cp;
+
+	if (bnxt_nq_rings_in_use(bp) > cp + bnxt_get_ulp_msix_num(bp))
+		return bnxt_get_ulp_msix_base(bp) + ulp_stat;
+
+	return cp + ulp_stat;
 }
 
 static bool bnxt_need_reserve_rings(struct bnxt *bp)
@@ -7477,11 +7541,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp)
 
 unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp)
 {
-	unsigned int stat;
-
-	stat = bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_ulp_stat_ctxs(bp);
-	stat -= bp->cp_nr_rings;
-	return stat;
+	return bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_func_stat_ctxs(bp);
 }
 
 int bnxt_get_avail_msix(struct bnxt *bp, int num)
@@ -7813,7 +7873,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
 
 		if (bp->bnapi[i]->rx_ring) {
 			INIT_WORK(&cpr->dim.work, bnxt_dim_work);
-			cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+			cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 		}
 		napi_enable(&bp->bnapi[i]->napi);
 	}
@@ -9847,32 +9907,19 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int bnxt_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct bnxt *bp = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
-					     bp, bp, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(bnxt_block_cb_list);
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
+	struct bnxt *bp = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return bnxt_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &bnxt_block_cb_list,
+						  bnxt_setup_tc_block_cb,
+						  bp, bp, true);
 	case TC_SETUP_QDISC_MQPRIO: {
 		struct tc_mqprio_qopt *mqprio = type_data;
 
@@ -10233,6 +10280,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
 	.ndo_bpf		= bnxt_xdp,
+	.ndo_xdp_xmit		= bnxt_xdp_xmit,
 	.ndo_bridge_getlink	= bnxt_bridge_getlink,
 	.ndo_bridge_setlink	= bnxt_bridge_setlink,
 	.ndo_get_devlink_port	= bnxt_get_devlink_port,
@@ -10262,10 +10310,10 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bnxt_dcb_free(bp);
 	kfree(bp->edev);
 	bp->edev = NULL;
+	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
 	kfree(bp->ctx);
 	bp->ctx = NULL;
-	bnxt_cleanup_pci(bp);
 	bnxt_free_port_stats(bp);
 	free_netdev(dev);
 }
@@ -10859,6 +10907,7 @@ static void bnxt_shutdown(struct pci_dev *pdev)
 
 	if (system_state == SYSTEM_POWER_OFF) {
 		bnxt_clear_int_mode(bp);
+		pci_disable_device(pdev);
 		pci_wake_from_d3(pdev, bp->wol);
 		pci_set_power_state(pdev, PCI_D3hot);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index be438d82f939..16694b704d15 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -24,7 +24,9 @@
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
+
+struct page_pool;
 
 struct tx_bd {
 	__le32 tx_bd_len_flags_type;
@@ -587,15 +589,21 @@ struct nqe_cn {
 #define BNXT_HWRM_CHNL_CHIMP	0
 #define BNXT_HWRM_CHNL_KONG	1
 
-#define BNXT_RX_EVENT	1
-#define BNXT_AGG_EVENT	2
-#define BNXT_TX_EVENT	4
+#define BNXT_RX_EVENT		1
+#define BNXT_AGG_EVENT		2
+#define BNXT_TX_EVENT		4
+#define BNXT_REDIRECT_EVENT	8
 
 struct bnxt_sw_tx_bd {
-	struct sk_buff		*skb;
+	union {
+		struct sk_buff		*skb;
+		struct xdp_frame	*xdpf;
+	};
 	DEFINE_DMA_UNMAP_ADDR(mapping);
+	DEFINE_DMA_UNMAP_LEN(len);
 	u8			is_gso;
 	u8			is_push;
+	u8			action;
 	union {
 		unsigned short		nr_frags;
 		u16			rx_prod;
@@ -793,6 +801,7 @@ struct bnxt_rx_ring_info {
 	struct bnxt_ring_struct	rx_ring_struct;
 	struct bnxt_ring_struct	rx_agg_ring_struct;
 	struct xdp_rxq_info	xdp_rxq;
+	struct page_pool	*page_pool;
 };
 
 struct bnxt_cp_ring_info {
@@ -810,7 +819,7 @@ struct bnxt_cp_ring_info {
 	u64			rx_bytes;
 	u64			event_ctr;
 
-	struct net_dim		dim;
+	struct dim		dim;
 
 	union {
 		struct tx_cmp	*cp_desc_ring[MAX_CP_PAGES];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 70775158c8c4..07301cb87c03 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -396,7 +396,7 @@ static int bnxt_hwrm_queue_dscp_qcaps(struct bnxt *bp)
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP_QCAPS, -1, -1);
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
 		bp->max_dscp_value = (1 << resp->num_dscp_bits) - 1;
 		if (bp->max_dscp_value < 0x3f)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
index 94e208e9789f..61393f351a77 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include "bnxt_hsi.h"
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "bnxt.h"
 #include "bnxt_debugfs.h"
 
@@ -21,7 +21,7 @@ static ssize_t debugfs_dim_read(struct file *filep,
 				char __user *buffer,
 				size_t count, loff_t *ppos)
 {
-	struct net_dim *dim = filep->private_data;
+	struct dim *dim = filep->private_data;
 	int len;
 	char *buf;
 
@@ -61,7 +61,7 @@ static const struct file_operations debugfs_dim_fops = {
 	.read = debugfs_dim_read,
 };
 
-static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
+static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx,
 					    struct dentry *dd)
 {
 	static char qname[16];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
index afa97c8bb081..6f6576dc417a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
@@ -7,26 +7,25 @@
  * the Free Software Foundation.
  */
 
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 
 void bnxt_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim,
-					   work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bnxt_cp_ring_info *cpr = container_of(dim,
 						     struct bnxt_cp_ring_info,
 						     dim);
 	struct bnxt_napi *bnapi = container_of(cpr,
 					       struct bnxt_napi,
 					       cp_ring);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
 	cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
 
 	bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a6c7baf38036..c7ee63d69679 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2799,7 +2799,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
 		dev_kfree_skb(skb);
 		return -EIO;
 	}
-	bnxt_xmit_xdp(bp, txr, map, pkt_size, 0);
+	bnxt_xmit_bd(bp, txr, map, pkt_size);
 
 	/* Sync BD data before updating doorbell */
 	wmb();
@@ -2842,7 +2842,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 	bool offline = false;
 	u8 test_results = 0;
 	u8 test_mask = 0;
-	int rc, i;
+	int rc = 0, i;
 
 	if (!bp->num_tests || !BNXT_SINGLE_PF(bp))
 		return;
@@ -2913,9 +2913,9 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 		}
 		bnxt_hwrm_phy_loopback(bp, false, false);
 		bnxt_half_close_nic(bp);
-		bnxt_open_nic(bp, false, true);
+		rc = bnxt_open_nic(bp, false, true);
 	}
-	if (bnxt_test_irq(bp)) {
+	if (rc || bnxt_test_irq(bp)) {
 		buf[BNXT_IRQ_TEST_IDX] = 1;
 		etest->flags |= ETH_TEST_FL_FAILED;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 44d6c5743fb9..6fe4a7174271 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -170,10 +170,10 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 }
 
 static int bnxt_tc_parse_flow(struct bnxt *bp,
-			      struct tc_cls_flower_offload *tc_flow_cmd,
+			      struct flow_cls_offload *tc_flow_cmd,
 			      struct bnxt_tc_flow *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(tc_flow_cmd);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
 	struct flow_dissector *dissector = rule->match.dissector;
 
 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
@@ -1262,7 +1262,7 @@ static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
  * The hash-tables are already protected by the rhashtable API.
  */
 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
-			    struct tc_cls_flower_offload *tc_flow_cmd)
+			    struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_flow_node *new_node, *old_node;
 	struct bnxt_tc_info *tc_info = bp->tc_info;
@@ -1348,7 +1348,7 @@ done:
 }
 
 static int bnxt_tc_del_flow(struct bnxt *bp,
-			    struct tc_cls_flower_offload *tc_flow_cmd)
+			    struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow_node *flow_node;
@@ -1363,7 +1363,7 @@ static int bnxt_tc_del_flow(struct bnxt *bp,
 }
 
 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
-				  struct tc_cls_flower_offload *tc_flow_cmd)
+				  struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
 	struct bnxt_tc_info *tc_info = bp->tc_info;
@@ -1585,14 +1585,14 @@ void bnxt_tc_flow_stats_work(struct bnxt *bp)
 }
 
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-			 struct tc_cls_flower_offload *cls_flower)
+			 struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return bnxt_tc_del_flow(bp, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return bnxt_tc_get_flow_stats(bp, cls_flower);
 	default:
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
index 8a0968967bc5..ffec57d1a5ec 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -196,7 +196,7 @@ struct bnxt_tc_flow_node {
 };
 
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-			 struct tc_cls_flower_offload *cls_flower);
+			 struct flow_cls_offload *cls_flower);
 int bnxt_init_tc(struct bnxt *bp);
 void bnxt_shutdown_tc(struct bnxt *bp);
 void bnxt_tc_flow_stats_work(struct bnxt *bp);
@@ -209,7 +209,7 @@ static inline bool bnxt_tc_flower_enabled(struct bnxt *bp)
 #else /* CONFIG_BNXT_FLOWER_OFFLOAD */
 
 static inline int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-				       struct tc_cls_flower_offload *cls_flower)
+				       struct flow_cls_offload *cls_flower)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index bfa342a98d08..fc77caf0a076 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -157,8 +157,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 
 	if (BNXT_NEW_RM(bp)) {
 		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+		int resv_msix;
 
-		avail_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
+		resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
+		avail_msix = min_t(int, resv_msix, avail_msix);
 		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	}
 	bnxt_fill_msix_vecs(bp, ent);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index f760921389a3..f9bf7d7250ab 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -161,34 +161,19 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type,
 	}
 }
 
-static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
-				      struct tc_block_offload *f)
-{
-	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     bnxt_vf_rep_setup_tc_block_cb,
-					     vf_rep, vf_rep, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					bnxt_vf_rep_setup_tc_block_cb, vf_rep);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(bnxt_vf_block_cb_list);
 
 static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 				void *type_data)
 {
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return bnxt_vf_rep_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &bnxt_vf_block_cb_list,
+						  bnxt_vf_rep_setup_tc_block_cb,
+						  vf_rep, vf_rep, true);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 0184ef6f05a7..c6f6f2033880 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -15,12 +15,14 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/filter.h>
+#include <net/page_pool.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_xdp.h"
 
-void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-		   dma_addr_t mapping, u32 len, u16 rx_prod)
+struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+				   struct bnxt_tx_ring_info *txr,
+				   dma_addr_t mapping, u32 len)
 {
 	struct bnxt_sw_tx_bd *tx_buf;
 	struct tx_bd *txbd;
@@ -29,7 +31,6 @@ void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 
 	prod = txr->tx_prod;
 	tx_buf = &txr->tx_buf_ring[prod];
-	tx_buf->rx_prod = rx_prod;
 
 	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
 	flags = (len << TX_BD_LEN_SHIFT) | (1 << TX_BD_FLAGS_BD_CNT_SHIFT) |
@@ -40,30 +41,67 @@ void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 
 	prod = NEXT_TX(prod);
 	txr->tx_prod = prod;
+	return tx_buf;
+}
+
+static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			    dma_addr_t mapping, u32 len, u16 rx_prod)
+{
+	struct bnxt_sw_tx_bd *tx_buf;
+
+	tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+	tx_buf->rx_prod = rx_prod;
+	tx_buf->action = XDP_TX;
+}
+
+static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
+				     struct bnxt_tx_ring_info *txr,
+				     dma_addr_t mapping, u32 len,
+				     struct xdp_frame *xdpf)
+{
+	struct bnxt_sw_tx_bd *tx_buf;
+
+	tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+	tx_buf->action = XDP_REDIRECT;
+	tx_buf->xdpf = xdpf;
+	dma_unmap_addr_set(tx_buf, mapping, mapping);
+	dma_unmap_len_set(tx_buf, len, 0);
 }
 
 void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 {
 	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	bool rx_doorbell_needed = false;
 	struct bnxt_sw_tx_bd *tx_buf;
 	u16 tx_cons = txr->tx_cons;
 	u16 last_tx_cons = tx_cons;
-	u16 rx_prod;
 	int i;
 
 	for (i = 0; i < nr_pkts; i++) {
-		last_tx_cons = tx_cons;
+		tx_buf = &txr->tx_buf_ring[tx_cons];
+
+		if (tx_buf->action == XDP_REDIRECT) {
+			struct pci_dev *pdev = bp->pdev;
+
+			dma_unmap_single(&pdev->dev,
+					 dma_unmap_addr(tx_buf, mapping),
+					 dma_unmap_len(tx_buf, len),
+					 PCI_DMA_TODEVICE);
+			xdp_return_frame(tx_buf->xdpf);
+			tx_buf->action = 0;
+			tx_buf->xdpf = NULL;
+		} else if (tx_buf->action == XDP_TX) {
+			rx_doorbell_needed = true;
+			last_tx_cons = tx_cons;
+		}
 		tx_cons = NEXT_TX(tx_cons);
 	}
 	txr->tx_cons = tx_cons;
-	if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) {
-		rx_prod = rxr->rx_prod;
-	} else {
+	if (rx_doorbell_needed) {
 		tx_buf = &txr->tx_buf_ring[last_tx_cons];
-		rx_prod = tx_buf->rx_prod;
+		bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod);
 	}
-	bnxt_db_write(bp, &rxr->rx_db, rx_prod);
 }
 
 /* returns the following:
@@ -88,19 +126,19 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		return false;
 
 	pdev = bp->pdev;
-	txr = rxr->bnapi->tx_ring;
 	rx_buf = &rxr->rx_buf_ring[cons];
 	offset = bp->rx_offset;
 
+	mapping = rx_buf->mapping - bp->rx_dma_offset;
+	dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
+
+	txr = rxr->bnapi->tx_ring;
 	xdp.data_hard_start = *data_ptr - offset;
 	xdp.data = *data_ptr;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = *data_ptr + *len;
 	xdp.rxq = &rxr->xdp_rxq;
 	orig_data = xdp.data;
-	mapping = rx_buf->mapping - bp->rx_dma_offset;
-
-	dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
 
 	rcu_read_lock();
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -132,10 +170,34 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		*event = BNXT_TX_EVENT;
 		dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
 					   bp->rx_dir);
-		bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
-			      NEXT_RX(rxr->rx_prod));
+		__bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
+				NEXT_RX(rxr->rx_prod));
 		bnxt_reuse_rx_data(rxr, cons, page);
 		return true;
+	case XDP_REDIRECT:
+		/* if we are calling this here then we know that the
+		 * redirect is coming from a frame received by the
+		 * bnxt_en driver.
+		 */
+		dma_unmap_page_attrs(&pdev->dev, mapping,
+				     PAGE_SIZE, bp->rx_dir,
+				     DMA_ATTR_WEAK_ORDERING);
+
+		/* if we are unable to allocate a new buffer, abort and reuse */
+		if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) {
+			trace_xdp_exception(bp->dev, xdp_prog, act);
+			bnxt_reuse_rx_data(rxr, cons, page);
+			return true;
+		}
+
+		if (xdp_do_redirect(bp->dev, &xdp, xdp_prog)) {
+			trace_xdp_exception(bp->dev, xdp_prog, act);
+			page_pool_recycle_direct(rxr->page_pool, page);
+			return true;
+		}
+
+		*event |= BNXT_REDIRECT_EVENT;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		/* Fall thru */
@@ -149,6 +211,56 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	return true;
 }
 
+int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+		  struct xdp_frame **frames, u32 flags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bpf_prog *xdp_prog = READ_ONCE(bp->xdp_prog);
+	struct pci_dev *pdev = bp->pdev;
+	struct bnxt_tx_ring_info *txr;
+	dma_addr_t mapping;
+	int drops = 0;
+	int ring;
+	int i;
+
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state) ||
+	    !bp->tx_nr_rings_xdp ||
+	    !xdp_prog)
+		return -EINVAL;
+
+	ring = smp_processor_id() % bp->tx_nr_rings_xdp;
+	txr = &bp->tx_ring[ring];
+
+	for (i = 0; i < num_frames; i++) {
+		struct xdp_frame *xdp = frames[i];
+
+		if (!txr || !bnxt_tx_avail(bp, txr) ||
+		    !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) {
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+			continue;
+		}
+
+		mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len,
+					 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(&pdev->dev, mapping)) {
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+			continue;
+		}
+		__bnxt_xmit_xdp_redirect(bp, txr, mapping, xdp->len, xdp);
+	}
+
+	if (flags & XDP_XMIT_FLUSH) {
+		/* Sync BD data before updating doorbell */
+		wmb();
+		bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
+	}
+
+	return num_frames - drops;
+}
+
 /* Under rtnl_lock */
 static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 414b748038ca..0df40c3beb05 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -10,12 +10,15 @@
 #ifndef BNXT_XDP_H
 #define BNXT_XDP_H
 
-void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-		   dma_addr_t mapping, u32 len, u16 rx_prod);
+struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+				   struct bnxt_tx_ring_info *txr,
+				   dma_addr_t mapping, u32 len);
 void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
 bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		 struct page *page, u8 **data_ptr, unsigned int *len,
 		 u8 *event);
 int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+		  struct xdp_frame **frames, u32 flags);
 
 #endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 41b50e6570ea..34466b827dde 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -640,7 +640,7 @@ static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring,
 static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
 					  struct ethtool_coalesce *ec)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	ring->rx_coalesce_usecs = ec->rx_coalesce_usecs;
@@ -1895,7 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcmgenet_rx_ring *ring = container_of(napi,
 			struct bcmgenet_rx_ring, napi);
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample;
 	unsigned int work_done;
 
 	work_done = bcmgenet_desc_rx(ring, budget);
@@ -1906,8 +1906,8 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (ring->dim.use_dim) {
-		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
-			       ring->dim.bytes, &dim_sample);
+		dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
+				  ring->dim.bytes, &dim_sample);
 		net_dim(&ring->dim.dim, dim_sample);
 	}
 
@@ -1916,16 +1916,16 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 
 static void bcmgenet_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bcmgenet_net_dim *ndim =
 			container_of(dim, struct bcmgenet_net_dim, dim);
 	struct bcmgenet_rx_ring *ring =
 			container_of(ndim, struct bcmgenet_rx_ring, dim);
-	struct net_dim_cq_moder cur_profile =
+	struct dim_cq_moder cur_profile =
 			net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 /* Assign skb to RX DMA descriptor. */
@@ -2082,7 +2082,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
 	struct bcmgenet_net_dim *dim = &ring->dim;
 
 	INIT_WORK(&dim->dim.work, cb);
-	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
 	dim->packets = 0;
 	dim->bytes = 0;
@@ -2091,7 +2091,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
 static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
 {
 	struct bcmgenet_net_dim *dim = &ring->dim;
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	usecs = ring->rx_coalesce_usecs;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 9ad835aee1bc..4a8fc03d82fd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -13,7 +13,7 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC				256
@@ -578,7 +578,7 @@ struct bcmgenet_net_dim {
 	u16		event_ctr;
 	unsigned long	packets;
 	unsigned long	bytes;
-	struct net_dim	dim;
+	struct dim	dim;
 };
 
 struct bcmgenet_rx_ring {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 6d1f9c822548..4c404d2213f9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6710,7 +6710,7 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
 	skb_size = SKB_DATA_ALIGN(data_size + TG3_RX_OFFSET(tp)) +
 		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	if (skb_size <= PAGE_SIZE) {
-		data = netdev_alloc_frag(skb_size);
+		data = napi_alloc_frag(skb_size);
 		*frag_size = skb_size;
 	} else {
 		data = kmalloc(skb_size, GFP_ATOMIC);
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 1766697c9c5a..f4b3bd85dfe3 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Atmel device configuration
+# Cadence device configuration
 #
 
 config NET_VENDOR_CADENCE
@@ -13,15 +13,15 @@ config NET_VENDOR_CADENCE
 	  If unsure, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the remaining Atmel network card questions. If you say Y, you will be
+	  kernel: saying N will just cause the configurator to skip all the
+	  remaining Cadence network card questions. If you say Y, you will be
 	  asked for your specific card in the following questions.
 
 if NET_VENDOR_CADENCE
 
 config MACB
 	tristate "Cadence MACB/GEM support"
-	depends on HAS_DMA
+	depends on HAS_DMA && COMMON_CLK
 	select PHYLIB
 	---help---
 	  The Cadence MACB ethernet interface is found on many Atmel AT32 and
@@ -42,7 +42,7 @@ config MACB_USE_HWSTAMP
 
 config MACB_PCI
 	tristate "Cadence PCI MACB/GEM support"
-	depends on MACB && PCI && COMMON_CLK
+	depends on MACB && PCI
 	---help---
 	  This is PCI wrapper for MACB driver.
 
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 6ff123da6a14..03983bd46eef 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -496,7 +496,11 @@
 
 /* Bitfields in TISUBN */
 #define GEM_SUBNSINCR_OFFSET			0
-#define GEM_SUBNSINCR_SIZE			16
+#define GEM_SUBNSINCRL_OFFSET			24
+#define GEM_SUBNSINCRL_SIZE			8
+#define GEM_SUBNSINCRH_OFFSET			0
+#define GEM_SUBNSINCRH_SIZE			16
+#define GEM_SUBNSINCR_SIZE			24
 
 /* Bitfields in TI */
 #define GEM_NSINCR_OFFSET			0
@@ -834,6 +838,9 @@ struct gem_tx_ts {
 /* limit RX checksum offload to TCP and UDP packets */
 #define GEM_RX_CSUM_CHECKED_MASK		2
 
+/* Scaled PPM fraction */
+#define PPM_FRACTION	16
+
 /* struct macb_tx_skb - data about an skb which is being transmitted
  * @skb: skb currently being transmitted, only set for the last buffer
  *       of the frame
@@ -1060,7 +1067,8 @@ struct macb_or_gem_ops {
 	int	(*mog_alloc_rx_buffers)(struct macb *bp);
 	void	(*mog_free_rx_buffers)(struct macb *bp);
 	void	(*mog_init_rings)(struct macb *bp);
-	int	(*mog_rx)(struct macb_queue *queue, int budget);
+	int	(*mog_rx)(struct macb_queue *queue, struct napi_struct *napi,
+			  int budget);
 };
 
 /* MACB-PTP interface: adapt to platform needs. */
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 262a28ff81fc..5ca17e62dc3e 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -7,6 +7,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -37,6 +38,13 @@
 #include <linux/pm_runtime.h>
 #include "macb.h"
 
+/* This structure is only used for MACB on SiFive FU540 devices */
+struct sifive_fu540_macb_mgmt {
+	void __iomem *reg;
+	unsigned long rate;
+	struct clk_hw hw;
+};
+
 #define MACB_RX_BUFFER_SIZE	128
 #define RX_BUFFER_MULTIPLE	64  /* bytes */
 
@@ -981,7 +989,8 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
 	 */
 }
 
-static int gem_rx(struct macb_queue *queue, int budget)
+static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
+		  int budget)
 {
 	struct macb *bp = queue->bp;
 	unsigned int		len;
@@ -1063,7 +1072,7 @@ static int gem_rx(struct macb_queue *queue, int budget)
 			       skb->data, 32, true);
 #endif
 
-		netif_receive_skb(skb);
+		napi_gro_receive(napi, skb);
 	}
 
 	gem_rx_refill(queue);
@@ -1071,8 +1080,8 @@ static int gem_rx(struct macb_queue *queue, int budget)
 	return count;
 }
 
-static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag,
-			 unsigned int last_frag)
+static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
+			 unsigned int first_frag, unsigned int last_frag)
 {
 	unsigned int len;
 	unsigned int frag;
@@ -1148,7 +1157,7 @@ static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag,
 	bp->dev->stats.rx_bytes += skb->len;
 	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
 		    skb->len, skb->csum);
-	netif_receive_skb(skb);
+	napi_gro_receive(napi, skb);
 
 	return 0;
 }
@@ -1171,7 +1180,8 @@ static inline void macb_init_rx_ring(struct macb_queue *queue)
 	queue->rx_tail = 0;
 }
 
-static int macb_rx(struct macb_queue *queue, int budget)
+static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
+		   int budget)
 {
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
@@ -1208,7 +1218,7 @@ static int macb_rx(struct macb_queue *queue, int budget)
 				continue;
 			}
 
-			dropped = macb_rx_frame(queue, first_frag, tail);
+			dropped = macb_rx_frame(queue, napi, first_frag, tail);
 			first_frag = -1;
 			if (unlikely(dropped < 0)) {
 				reset_rx_queue = true;
@@ -1262,7 +1272,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
 		    (unsigned long)status, budget);
 
-	work_done = bp->macbgem_ops.mog_rx(queue, budget);
+	work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 
@@ -3477,7 +3487,7 @@ static int macb_init(struct platform_device *pdev)
 
 		queue = &bp->queues[q];
 		queue->bp = bp;
-		netif_napi_add(dev, &queue->napi, macb_poll, 64);
+		netif_napi_add(dev, &queue->napi, macb_poll, NAPI_POLL_WEIGHT);
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
 			queue->IER  = GEM_IER(hw_q - 1);
@@ -3616,6 +3626,8 @@ static int macb_init(struct platform_device *pdev)
 /* max number of receive buffers */
 #define AT91ETHER_MAX_RX_DESCR	9
 
+static struct sifive_fu540_macb_mgmt *mgmt;
+
 /* Initialize and start the Receiver and Transmit subsystems */
 static int at91ether_start(struct net_device *dev)
 {
@@ -3943,6 +3955,116 @@ static int at91ether_init(struct platform_device *pdev)
 	return 0;
 }
 
+static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	return mgmt->rate;
+}
+
+static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	if (WARN_ON(rate < 2500000))
+		return 2500000;
+	else if (rate == 2500000)
+		return 2500000;
+	else if (WARN_ON(rate < 13750000))
+		return 2500000;
+	else if (WARN_ON(rate < 25000000))
+		return 25000000;
+	else if (rate == 25000000)
+		return 25000000;
+	else if (WARN_ON(rate < 75000000))
+		return 25000000;
+	else if (WARN_ON(rate < 125000000))
+		return 125000000;
+	else if (rate == 125000000)
+		return 125000000;
+
+	WARN_ON(rate > 125000000);
+
+	return 125000000;
+}
+
+static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long parent_rate)
+{
+	rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate);
+	if (rate != 125000000)
+		iowrite32(1, mgmt->reg);
+	else
+		iowrite32(0, mgmt->reg);
+	mgmt->rate = rate;
+
+	return 0;
+}
+
+static const struct clk_ops fu540_c000_ops = {
+	.recalc_rate = fu540_macb_tx_recalc_rate,
+	.round_rate = fu540_macb_tx_round_rate,
+	.set_rate = fu540_macb_tx_set_rate,
+};
+
+static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk,
+			       struct clk **hclk, struct clk **tx_clk,
+			       struct clk **rx_clk, struct clk **tsu_clk)
+{
+	struct clk_init_data init;
+	int err = 0;
+
+	err = macb_clk_init(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk);
+	if (err)
+		return err;
+
+	mgmt = devm_kzalloc(&pdev->dev, sizeof(*mgmt), GFP_KERNEL);
+	if (!mgmt)
+		return -ENOMEM;
+
+	init.name = "sifive-gemgxl-mgmt";
+	init.ops = &fu540_c000_ops;
+	init.flags = 0;
+	init.num_parents = 0;
+
+	mgmt->rate = 0;
+	mgmt->hw.init = &init;
+
+	*tx_clk = clk_register(NULL, &mgmt->hw);
+	if (IS_ERR(*tx_clk))
+		return PTR_ERR(*tx_clk);
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err)
+		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+	else
+		dev_info(&pdev->dev, "Registered clk switch '%s'\n", init.name);
+
+	return 0;
+}
+
+static int fu540_c000_init(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	mgmt->reg = ioremap(res->start, resource_size(res));
+	if (!mgmt->reg)
+		return -ENOMEM;
+
+	return macb_init(pdev);
+}
+
+static const struct macb_config fu540_c000_config = {
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
+		MACB_CAPS_GEM_HAS_PTP,
+	.dma_burst_length = 16,
+	.clk_init = fu540_c000_clk_init,
+	.init = fu540_c000_init,
+	.jumbo_max_len = 10240,
+};
+
 static const struct macb_config at91sam9260_config = {
 	.caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII,
 	.clk_init = macb_clk_init,
@@ -4032,6 +4154,7 @@ static const struct of_device_id macb_dt_ids[] = {
 	{ .compatible = "cdns,emac", .data = &emac_config },
 	{ .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config},
 	{ .compatible = "cdns,zynq-gem", .data = &zynq_config },
+	{ .compatible = "sifive,fu540-macb", .data = &fu540_c000_config },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, macb_dt_ids);
@@ -4239,6 +4362,7 @@ err_out_free_netdev:
 
 err_disable_clocks:
 	clk_disable_unprepare(tx_clk);
+	clk_unregister(tx_clk);
 	clk_disable_unprepare(hclk);
 	clk_disable_unprepare(pclk);
 	clk_disable_unprepare(rx_clk);
@@ -4273,6 +4397,7 @@ static int macb_remove(struct platform_device *pdev)
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
 		if (!pm_runtime_suspended(&pdev->dev)) {
 			clk_disable_unprepare(bp->tx_clk);
+			clk_unregister(bp->tx_clk);
 			clk_disable_unprepare(bp->hclk);
 			clk_disable_unprepare(bp->pclk);
 			clk_disable_unprepare(bp->rx_clk);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 0a8aca8d3634..43a3f0dbf857 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -104,7 +104,10 @@ static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec)
 	 * to take effect.
 	 */
 	spin_lock_irqsave(&bp->tsu_clk_lock, flags);
-	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns));
+	/* RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] */
+	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCRL, incr_spec->sub_ns) |
+		   GEM_BF(SUBNSINCRH, (incr_spec->sub_ns >>
+			  GEM_SUBNSINCRL_SIZE)));
 	gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns));
 	spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
 
@@ -135,7 +138,7 @@ static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 	 * (temp / USEC_PER_SEC) + 0.5
 	 */
 	adj += (USEC_PER_SEC >> 1);
-	adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */
+	adj >>= PPM_FRACTION; /* remove fractions */
 	adj = div_u64(adj, USEC_PER_SEC);
 	adj = neg_adj ? (word - adj) : (word + adj);
 
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 11d4e91ea754..99f49d059414 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1855,7 +1855,7 @@ static void xgmac_pmt(void __iomem *ioaddr, unsigned long mode)
 
 static int xgmac_suspend(struct device *dev)
 {
-	struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xgmac_priv *priv = netdev_priv(ndev);
 	u32 value;
 
@@ -1881,7 +1881,7 @@ static int xgmac_suspend(struct device *dev)
 
 static int xgmac_resume(struct device *dev)
 {
-	struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xgmac_priv *priv = netdev_priv(ndev);
 	void __iomem *ioaddr = priv->base;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 91d8a885deba..20390f6afbb4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
 	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
-	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
+	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \
 	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a8fe0808823d..1fbb640e896a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -280,6 +280,7 @@ struct tp_params {
 	unsigned short tx_modq[NCHAN];	/* channel to modulation queue map */
 
 	u32 vlan_pri_map;               /* cached TP_VLAN_PRI_MAP */
+	u32 filter_mask;
 	u32 ingress_config;             /* cached TP_INGRESS_CONFIG */
 
 	/* cached TP_OUT_CONFIG compressed error vector
@@ -600,6 +601,7 @@ struct port_info {
 	u8 vin;
 	u8 vivld;
 	u8 smt_idx;
+	u8 rx_cchan;
 };
 
 struct dentry;
@@ -878,6 +880,7 @@ struct uld_msix_info {
 	unsigned short vec;
 	char desc[IFNAMSIZ + 10];
 	unsigned int idx;
+	cpumask_var_t aff_mask;
 };
 
 struct vf_info {
@@ -902,10 +905,6 @@ struct mbox_list {
 	struct list_head list;
 };
 
-struct mps_encap_entry {
-	atomic_t refcnt;
-};
-
 #if IS_ENABLED(CONFIG_THERMAL)
 struct ch_thermal {
 	struct thermal_zone_device *tzdev;
@@ -914,6 +913,14 @@ struct ch_thermal {
 };
 #endif
 
+struct mps_entries_ref {
+	struct list_head list;
+	u8 addr[ETH_ALEN];
+	u8 mask[ETH_ALEN];
+	u16 idx;
+	refcount_t refcnt;
+};
+
 struct adapter {
 	void __iomem *regs;
 	void __iomem *bar2;
@@ -938,9 +945,10 @@ struct adapter {
 	struct cxgb4_virt_res vres;
 	unsigned int swintr;
 
-	struct {
+	struct msix_info {
 		unsigned short vec;
 		char desc[IFNAMSIZ + 10];
+		cpumask_var_t aff_mask;
 	} msix_info[MAX_INGQ + 1];
 	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
 	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
@@ -965,7 +973,6 @@ struct adapter {
 	unsigned int rawf_start;
 	unsigned int rawf_cnt;
 	struct smt_data *smt;
-	struct mps_encap_entry *mps_encap;
 	struct cxgb4_uld_info *uld;
 	void *uld_handle[CXGB4_ULD_MAX];
 	unsigned int num_uld;
@@ -973,6 +980,8 @@ struct adapter {
 	struct list_head list_node;
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
+	struct list_head mps_ref;
+	spinlock_t mps_ref_lock; /* lock for syncing mps ref/def activities */
 
 	void *iscsi_ppm;
 
@@ -1898,5 +1907,46 @@ int cxgb4_dcb_enabled(const struct net_device *dev);
 
 int cxgb4_thermal_init(struct adapter *adap);
 int cxgb4_thermal_remove(struct adapter *adap);
+int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
+		       cpumask_var_t *aff_mask, int idx);
+void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask);
+
+int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
+		     int *tcam_idx, const u8 *addr,
+		     bool persistent, u8 *smt_idx);
+
+int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid,
+			 bool free, unsigned int naddr,
+			 const u8 **addr, u16 *idx,
+			 u64 *hash, bool sleep_ok);
+int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid,
+			unsigned int naddr, const u8 **addr, bool sleep_ok);
+int cxgb4_init_mps_ref_entries(struct adapter *adap);
+void cxgb4_free_mps_ref_entries(struct adapter *adap);
+int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			       const u8 *addr, const u8 *mask,
+			       unsigned int vni, unsigned int vni_mask,
+			       u8 dip_hit, u8 lookup_type, bool sleep_ok);
+int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			      int idx, bool sleep_ok);
+int cxgb4_free_raw_mac_filt(struct adapter *adap,
+			    unsigned int viid,
+			    const u8 *addr,
+			    const u8 *mask,
+			    unsigned int idx,
+			    u8 lookup_type,
+			    u8 port_id,
+			    bool sleep_ok);
+int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
+			     unsigned int viid,
+			     const u8 *addr,
+			     const u8 *mask,
+			     unsigned int idx,
+			     u8 lookup_type,
+			     u8 port_id,
+			     bool sleep_ok);
+int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
+			  int *tcam_idx, const u8 *addr,
+			  bool persistent, u8 *smt_idx);
 
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 4107007b6ec4..43b0f8c57da7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -248,8 +248,9 @@ static int validate_filter(struct net_device *dev,
 	u32 fconf, iconf;
 
 	/* Check for unconfigured fields being used. */
-	fconf = adapter->params.tp.vlan_pri_map;
 	iconf = adapter->params.tp.ingress_config;
+	fconf = fs->hash ? adapter->params.tp.filter_mask :
+			   adapter->params.tp.vlan_pri_map;
 
 	if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
 	    unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
@@ -726,10 +727,8 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
 		cxgb4_smt_release(f->smt);
 
 	if (f->fs.val.encap_vld && f->fs.val.ovlan_vld)
-		if (atomic_dec_and_test(&adap->mps_encap[f->fs.val.ovlan &
-							 0x1ff].refcnt))
-			t4_free_encap_mac_filt(adap, pi->viid,
-					       f->fs.val.ovlan & 0x1ff, 0);
+		t4_free_encap_mac_filt(adap, pi->viid,
+				       f->fs.val.ovlan & 0x1ff, 0);
 
 	if ((f->fs.hash || is_t6(adap->params.chip)) && f->fs.type)
 		cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
@@ -1041,7 +1040,7 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1081,7 +1080,7 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1176,7 +1175,6 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
 			if (ret < 0)
 				goto free_atid;
 
-			atomic_inc(&adapter->mps_encap[ret].refcnt);
 			f->fs.val.ovlan = ret;
 			f->fs.mask.ovlan = 0xffff;
 			f->fs.val.ovlan_vld = 1;
@@ -1419,7 +1417,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 			if (ret < 0)
 				goto free_clip;
 
-			atomic_inc(&adapter->mps_encap[ret].refcnt);
 			f->fs.val.ovlan = ret;
 			f->fs.mask.ovlan = 0x1ff;
 			f->fs.val.ovlan_vld = 1;
@@ -1833,24 +1830,38 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 	}
 }
 
-int init_hash_filter(struct adapter *adap)
+void init_hash_filter(struct adapter *adap)
 {
+	u32 reg;
+
 	/* On T6, verify the necessary register configs and warn the user in
 	 * case of improper config
 	 */
 	if (is_t6(adap->params.chip)) {
-		if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4)
-			goto err;
+		if (is_offload(adap)) {
+			if (!(t4_read_reg(adap, TP_GLOBAL_CONFIG_A)
+			   & ACTIVEFILTERCOUNTS_F)) {
+				dev_err(adap->pdev_dev, "Invalid hash filter + ofld config\n");
+				return;
+			}
+		} else {
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_0_A);
+			if (TCAM_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
+
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_1_A);
+			if (HASH_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
+		}
 
-		if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4)
-			goto err;
 	} else {
 		dev_err(adap->pdev_dev, "Hash filter supported only on T6\n");
-		return -EINVAL;
+		return;
 	}
+
 	adap->params.hash_filter = 1;
-	return 0;
-err:
-	dev_warn(adap->pdev_dev, "Invalid hash filter config!\n");
-	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index 8db5fca6dcc9..b0751c0611ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -50,7 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx);
 
 int writable_filter(struct filter_entry *f);
 void clear_all_filters(struct adapter *adapter);
-int init_hash_filter(struct adapter *adap);
+void init_hash_filter(struct adapter *adap);
 bool is_filter_exact_match(struct adapter *adap,
 			   struct ch_filter_specification *fs);
 #endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 715e4edcf4a2..67202b6f352e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -366,13 +366,19 @@ static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 	int ret;
 	u64 mhash = 0;
 	u64 uhash = 0;
+	/* idx stores the index of allocated filters,
+	 * its size should be modified based on the number of
+	 * MAC addresses that we allocate filters for
+	 */
+
+	u16 idx[1] = {};
 	bool free = false;
 	bool ucast = is_unicast_ether_addr(mac_addr);
 	const u8 *maclist[1] = {mac_addr};
 	struct hash_mac_addr *new_entry;
 
-	ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist,
-				NULL, ucast ? &uhash : &mhash, false);
+	ret = cxgb4_alloc_mac_filt(adap, pi->viid, free, 1, maclist,
+				   idx, ucast ? &uhash : &mhash, false);
 	if (ret < 0)
 		goto out;
 	/* if hash != 0, then add the addr to hash addr list
@@ -410,7 +416,7 @@ static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 		}
 	}
 
-	ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false);
+	ret = cxgb4_free_mac_filt(adap, pi->viid, 1, maclist, false);
 	return ret < 0 ? -EINVAL : 0;
 }
 
@@ -449,9 +455,9 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
  *	Addresses are programmed to hash region, if tcam runs out of entries.
  *
  */
-static int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
-			    int *tcam_idx, const u8 *addr, bool persist,
-			    u8 *smt_idx)
+int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
+		     int *tcam_idx, const u8 *addr, bool persist,
+		     u8 *smt_idx)
 {
 	struct adapter *adapter = pi->adapter;
 	struct hash_mac_addr *entry, *new_entry;
@@ -505,8 +511,8 @@ static int link_start(struct net_device *dev)
 	ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
 			    !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
 	if (ret == 0)
-		ret = cxgb4_change_mac(pi, pi->viid, &pi->xact_addr_filt,
-				       dev->dev_addr, true, &pi->smt_idx);
+		ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
+					    dev->dev_addr, true, &pi->smt_idx);
 	if (ret == 0)
 		ret = t4_link_l1cfg(pi->adapter, mb, pi->tx_chan,
 				    &pi->link_cfg);
@@ -702,9 +708,38 @@ static void name_msix_vecs(struct adapter *adap)
 	}
 }
 
+int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
+		       cpumask_var_t *aff_mask, int idx)
+{
+	int rv;
+
+	if (!zalloc_cpumask_var(aff_mask, GFP_KERNEL)) {
+		dev_err(adap->pdev_dev, "alloc_cpumask_var failed\n");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(idx, dev_to_node(adap->pdev_dev)),
+			*aff_mask);
+
+	rv = irq_set_affinity_hint(vec, *aff_mask);
+	if (rv)
+		dev_warn(adap->pdev_dev,
+			 "irq_set_affinity_hint %u failed %d\n",
+			 vec, rv);
+
+	return 0;
+}
+
+void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask)
+{
+	irq_set_affinity_hint(vec, NULL);
+	free_cpumask_var(aff_mask);
+}
+
 static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
+	struct msix_info *minfo;
 	int err, ethqidx;
 	int msi_index = 2;
 
@@ -714,32 +749,77 @@ static int request_msix_queue_irqs(struct adapter *adap)
 		return err;
 
 	for_each_ethrxq(s, ethqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
+		minfo = &adap->msix_info[msi_index];
+		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
+				  minfo->desc,
 				  &s->ethrxq[ethqidx].rspq);
 		if (err)
 			goto unwind;
+
+		cxgb4_set_msix_aff(adap, minfo->vec,
+				   &minfo->aff_mask, ethqidx);
 		msi_index++;
 	}
 	return 0;
 
 unwind:
-	while (--ethqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->ethrxq[ethqidx].rspq);
+	while (--ethqidx >= 0) {
+		msi_index--;
+		minfo = &adap->msix_info[msi_index];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
+		free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq);
+	}
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
 	return err;
 }
 
 static void free_msix_queue_irqs(struct adapter *adap)
 {
-	int i, msi_index = 2;
 	struct sge *s = &adap->sge;
+	struct msix_info *minfo;
+	int i, msi_index = 2;
 
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
-	for_each_ethrxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
+	for_each_ethrxq(s, i) {
+		minfo = &adap->msix_info[msi_index++];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
+		free_irq(minfo->vec, &s->ethrxq[i].rspq);
+	}
+}
+
+static int setup_ppod_edram(struct adapter *adap)
+{
+	unsigned int param, val;
+	int ret;
+
+	/* Driver sends FW_PARAMS_PARAM_DEV_PPOD_EDRAM read command to check
+	 * if firmware supports ppod edram feature or not. If firmware
+	 * returns 1, then driver can enable this feature by sending
+	 * FW_PARAMS_PARAM_DEV_PPOD_EDRAM write command with value 1 to
+	 * enable ppod edram feature.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PPOD_EDRAM));
+
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+	if (ret < 0) {
+		dev_warn(adap->pdev_dev,
+			 "querying PPOD_EDRAM support failed: %d\n",
+			 ret);
+		return -1;
+	}
+
+	if (val != 1)
+		return -1;
+
+	ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+	if (ret < 0) {
+		dev_err(adap->pdev_dev,
+			"setting PPOD_EDRAM failed: %d\n", ret);
+		return -1;
+	}
+	return 0;
 }
 
 /**
@@ -1646,6 +1726,18 @@ unsigned int cxgb4_port_chan(const struct net_device *dev)
 }
 EXPORT_SYMBOL(cxgb4_port_chan);
 
+/**
+ *      cxgb4_port_e2cchan - get the HW c-channel of a port
+ *      @dev: the net device for the port
+ *
+ *      Return the HW RX c-channel of the given port.
+ */
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev)
+{
+	return netdev2pinfo(dev)->rx_cchan;
+}
+EXPORT_SYMBOL(cxgb4_port_e2cchan);
+
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
 {
 	struct adapter *adap = netdev2adap(dev);
@@ -2934,8 +3026,8 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = cxgb4_change_mac(pi, pi->viid, &pi->xact_addr_filt,
-			       addr->sa_data, true, &pi->smt_idx);
+	ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
+				    addr->sa_data, true, &pi->smt_idx);
 	if (ret < 0)
 		return ret;
 
@@ -3043,14 +3135,14 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 }
 
 static int cxgb_setup_tc_flower(struct net_device *dev,
-				struct tc_cls_flower_offload *cls_flower)
+				struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return cxgb4_tc_flower_replace(dev, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return cxgb4_tc_flower_destroy(dev, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return cxgb4_tc_flower_stats(dev, cls_flower);
 	default:
 		return -EOPNOTSUPP;
@@ -3098,32 +3190,19 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int cxgb_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct port_info *pi = netdev2pinfo(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
-					     pi, dev, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(cxgb_block_cb_list);
 
 static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
+	struct port_info *pi = netdev2pinfo(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return cxgb_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &cxgb_block_cb_list,
+						  cxgb_setup_tc_block_cb,
+						  pi, dev, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3187,8 +3266,6 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev,
 				    i);
 			return;
 		}
-		atomic_dec(&adapter->mps_encap[adapter->rawf_start +
-			   pi->port_id].refcnt);
 	}
 }
 
@@ -3277,7 +3354,6 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
 			cxgb_del_udp_tunnel(netdev, ti);
 			return;
 		}
-		atomic_inc(&adapter->mps_encap[ret].refcnt);
 	}
 }
 
@@ -3905,14 +3981,14 @@ static int adap_init0_phy(struct adapter *adap)
  */
 static int adap_init0_config(struct adapter *adapter, int reset)
 {
+	char *fw_config_file, fw_config_file_path[256];
+	u32 finiver, finicsum, cfcsum, param, val;
 	struct fw_caps_config_cmd caps_cmd;
-	const struct firmware *cf;
 	unsigned long mtype = 0, maddr = 0;
-	u32 finiver, finicsum, cfcsum;
-	int ret;
-	int config_issued = 0;
-	char *fw_config_file, fw_config_file_path[256];
+	const struct firmware *cf;
 	char *config_name = NULL;
+	int config_issued = 0;
+	int ret;
 
 	/*
 	 * Reset device if necessary.
@@ -4020,6 +4096,24 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 			goto bye;
 	}
 
+	val = 0;
+
+	/* Ofld + Hash filter is supported. Older fw will fail this request and
+	 * it is fine.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD));
+	ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+			    1, &param, &val);
+
+	/* FW doesn't know about Hash filter + ofld support,
+	 * it's not a problem, don't return an error.
+	 */
+	if (ret < 0) {
+		dev_warn(adapter->pdev_dev,
+			 "Hash filter with ofld is not supported by FW\n");
+	}
+
 	/*
 	 * Issue a Capability Configuration command to the firmware to get it
 	 * to parse the Configuration File.  We don't use t4_fw_config_file()
@@ -4096,6 +4190,13 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 		dev_err(adapter->pdev_dev,
 			"HMA configuration failed with error %d\n", ret);
 
+	if (is_t6(adapter->params.chip)) {
+		ret = setup_ppod_edram(adapter);
+		if (!ret)
+			dev_info(adapter->pdev_dev, "Successfully enabled "
+				 "ppod edram feature\n");
+	}
+
 	/*
 	 * And finally tell the firmware to initialize itself using the
 	 * parameters from the Configuration File.
@@ -4580,6 +4681,13 @@ static int adap_init0(struct adapter *adap)
 	if (ret < 0)
 		goto bye;
 
+	/* hash filter has some mandatory register settings to be tested and for
+	 * that it needs to test whether offload is enabled or not, hence
+	 * checking and setting it here.
+	 */
+	if (caps_cmd.ofldcaps)
+		adap->params.offload = 1;
+
 	if (caps_cmd.ofldcaps ||
 	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
 		/* query offload-related parameters */
@@ -4619,11 +4727,8 @@ static int adap_init0(struct adapter *adap)
 		adap->params.ofldq_wr_cred = val[5];
 
 		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
-			ret = init_hash_filter(adap);
-			if (ret < 0)
-				goto bye;
+			init_hash_filter(adap);
 		} else {
-			adap->params.offload = 1;
 			adap->num_ofld_uld += 1;
 		}
 	}
@@ -4715,6 +4820,22 @@ static int adap_init0(struct adapter *adap)
 			goto bye;
 		adap->vres.iscsi.start = val[0];
 		adap->vres.iscsi.size = val[1] - val[0] + 1;
+		if (is_t6(adap->params.chip)) {
+			params[0] = FW_PARAM_PFVF(PPOD_EDRAM_START);
+			params[1] = FW_PARAM_PFVF(PPOD_EDRAM_END);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+					      params, val);
+			if (!ret) {
+				adap->vres.ppod_edram.start = val[0];
+				adap->vres.ppod_edram.size =
+					val[1] - val[0] + 1;
+
+				dev_info(adap->pdev_dev,
+					 "ppod edram start 0x%x end 0x%x size 0x%x\n",
+					 val[0], val[1],
+					 adap->vres.ppod_edram.size);
+			}
+		}
 		/* LIO target and cxgb4i initiaitor */
 		adap->num_ofld_uld += 2;
 	}
@@ -5315,7 +5436,6 @@ static void free_some_resources(struct adapter *adapter)
 {
 	unsigned int i;
 
-	kvfree(adapter->mps_encap);
 	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
 	kvfree(adapter->srq);
@@ -5841,12 +5961,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		adapter->params.offload = 0;
 	}
 
-	adapter->mps_encap = kvcalloc(adapter->params.arch.mps_tcam_size,
-				      sizeof(struct mps_encap_entry),
-				      GFP_KERNEL);
-	if (!adapter->mps_encap)
-		dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n");
-
 #if IS_ENABLED(CONFIG_IPV6)
 	if (chip_ver <= CHELSIO_T5 &&
 	    (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
@@ -5922,6 +6036,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* check for PCI Express bandwidth capabiltites */
 	pcie_print_link_status(pdev);
 
+	cxgb4_init_mps_ref_entries(adapter);
+
 	err = init_rss(adapter);
 	if (err)
 		goto out_free_dev;
@@ -6048,6 +6164,8 @@ static void remove_one(struct pci_dev *pdev)
 
 		disable_interrupts(adapter);
 
+		cxgb4_free_mps_ref_entries(adapter);
+
 		for_each_port(adapter, i)
 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 				unregister_netdev(adapter->port[i]);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
new file mode 100644
index 000000000000..b1a073eea60b
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Chelsio Communications, Inc. All rights reserved. */
+
+#include "cxgb4.h"
+
+static int cxgb4_mps_ref_dec_by_mac(struct adapter *adap,
+				    const u8 *addr, const u8 *mask)
+{
+	u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mps_entries_ref *mps_entry, *tmp;
+	int ret = -EINVAL;
+
+	spin_lock_bh(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		if (ether_addr_equal(mps_entry->addr, addr) &&
+		    ether_addr_equal(mps_entry->mask, mask ? mask : bitmask)) {
+			if (!refcount_dec_and_test(&mps_entry->refcnt)) {
+				spin_unlock_bh(&adap->mps_ref_lock);
+				return -EBUSY;
+			}
+			list_del(&mps_entry->list);
+			kfree(mps_entry);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_bh(&adap->mps_ref_lock);
+	return ret;
+}
+
+static int cxgb4_mps_ref_dec(struct adapter *adap, u16 idx)
+{
+	struct mps_entries_ref *mps_entry, *tmp;
+	int ret = -EINVAL;
+
+	spin_lock(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		if (mps_entry->idx == idx) {
+			if (!refcount_dec_and_test(&mps_entry->refcnt)) {
+				spin_unlock(&adap->mps_ref_lock);
+				return -EBUSY;
+			}
+			list_del(&mps_entry->list);
+			kfree(mps_entry);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&adap->mps_ref_lock);
+	return ret;
+}
+
+static int cxgb4_mps_ref_inc(struct adapter *adap, const u8 *mac_addr,
+			     u16 idx, const u8 *mask)
+{
+	u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mps_entries_ref *mps_entry;
+	int ret = 0;
+
+	spin_lock_bh(&adap->mps_ref_lock);
+	list_for_each_entry(mps_entry, &adap->mps_ref, list) {
+		if (mps_entry->idx == idx) {
+			refcount_inc(&mps_entry->refcnt);
+			goto unlock;
+		}
+	}
+	mps_entry = kzalloc(sizeof(*mps_entry), GFP_ATOMIC);
+	if (!mps_entry) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+	ether_addr_copy(mps_entry->mask, mask ? mask : bitmask);
+	ether_addr_copy(mps_entry->addr, mac_addr);
+	mps_entry->idx = idx;
+	refcount_set(&mps_entry->refcnt, 1);
+	list_add_tail(&mps_entry->list, &adap->mps_ref);
+unlock:
+	spin_unlock_bh(&adap->mps_ref_lock);
+	return ret;
+}
+
+int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid,
+			unsigned int naddr, const u8 **addr, bool sleep_ok)
+{
+	int ret, i;
+
+	for (i = 0; i < naddr; i++) {
+		if (!cxgb4_mps_ref_dec_by_mac(adap, addr[i], NULL)) {
+			ret = t4_free_mac_filt(adap, adap->mbox, viid,
+					       1, &addr[i], sleep_ok);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* return number of filters freed */
+	return naddr;
+}
+
+int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid,
+			 bool free, unsigned int naddr, const u8 **addr,
+			 u16 *idx, u64 *hash, bool sleep_ok)
+{
+	int ret, i;
+
+	ret = t4_alloc_mac_filt(adap, adap->mbox, viid, free,
+				naddr, addr, idx, hash, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < naddr; i++) {
+		if (idx[i] != 0xffff) {
+			if (cxgb4_mps_ref_inc(adap, addr[i], idx[i], NULL)) {
+				ret = -ENOMEM;
+				goto error;
+			}
+		}
+	}
+
+	goto out;
+error:
+	cxgb4_free_mac_filt(adap, viid, naddr, addr, sleep_ok);
+
+out:
+	/* Returns a negative error number or the number of filters allocated */
+	return ret;
+}
+
+int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
+			  int *tcam_idx, const u8 *addr,
+			  bool persistent, u8 *smt_idx)
+{
+	int ret;
+
+	ret = cxgb4_change_mac(pi, viid, tcam_idx,
+			       addr, persistent, smt_idx);
+	if (ret < 0)
+		return ret;
+
+	cxgb4_mps_ref_inc(pi->adapter, addr, *tcam_idx, NULL);
+	return ret;
+}
+
+int cxgb4_free_raw_mac_filt(struct adapter *adap,
+			    unsigned int viid,
+			    const u8 *addr,
+			    const u8 *mask,
+			    unsigned int idx,
+			    u8 lookup_type,
+			    u8 port_id,
+			    bool sleep_ok)
+{
+	int ret = 0;
+
+	if (!cxgb4_mps_ref_dec(adap, idx))
+		ret = t4_free_raw_mac_filt(adap, viid, addr,
+					   mask, idx, lookup_type,
+					   port_id, sleep_ok);
+
+	return ret;
+}
+
+int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
+			     unsigned int viid,
+			     const u8 *addr,
+			     const u8 *mask,
+			     unsigned int idx,
+			     u8 lookup_type,
+			     u8 port_id,
+			     bool sleep_ok)
+{
+	int ret;
+
+	ret = t4_alloc_raw_mac_filt(adap, viid, addr,
+				    mask, idx, lookup_type,
+				    port_id, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) {
+		ret = -ENOMEM;
+		t4_free_raw_mac_filt(adap, viid, addr,
+				     mask, idx, lookup_type,
+				     port_id, sleep_ok);
+	}
+
+	return ret;
+}
+
+int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			      int idx, bool sleep_ok)
+{
+	int ret = 0;
+
+	if (!cxgb4_mps_ref_dec(adap, idx))
+		ret = t4_free_encap_mac_filt(adap, viid, idx, sleep_ok);
+
+	return ret;
+}
+
+int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			       const u8 *addr, const u8 *mask,
+			       unsigned int vni, unsigned int vni_mask,
+			       u8 dip_hit, u8 lookup_type, bool sleep_ok)
+{
+	int ret;
+
+	ret = t4_alloc_encap_mac_filt(adap, viid, addr, mask, vni, vni_mask,
+				      dip_hit, lookup_type, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) {
+		ret = -ENOMEM;
+		t4_free_encap_mac_filt(adap, viid, ret, sleep_ok);
+	}
+	return ret;
+}
+
+int cxgb4_init_mps_ref_entries(struct adapter *adap)
+{
+	spin_lock_init(&adap->mps_ref_lock);
+	INIT_LIST_HEAD(&adap->mps_ref);
+
+	return 0;
+}
+
+void cxgb4_free_mps_ref_entries(struct adapter *adap)
+{
+	struct mps_entries_ref *mps_entry, *tmp;
+
+	if (!list_empty(&adap->mps_ref))
+		return;
+
+	spin_lock(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		list_del(&mps_entry->list);
+		kfree(mps_entry);
+	}
+	spin_unlock(&adap->mps_ref_lock);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index cfaf8f618d1f..312599c6b35a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -80,10 +80,10 @@ static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap,
 }
 
 static void cxgb4_process_flow_match(struct net_device *dev,
-				     struct tc_cls_flower_offload *cls,
+				     struct flow_cls_offload *cls,
 				     struct ch_filter_specification *fs)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	u16 addr_type = 0;
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -223,9 +223,9 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 }
 
 static int cxgb4_validate_flow_match(struct net_device *dev,
-				     struct tc_cls_flower_offload *cls)
+				     struct flow_cls_offload *cls)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 ethtype_mask = 0;
 	u16 ethtype_key = 0;
@@ -378,10 +378,10 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
 }
 
 static void cxgb4_process_flow_actions(struct net_device *in,
-				       struct tc_cls_flower_offload *cls,
+				       struct flow_cls_offload *cls,
 				       struct ch_filter_specification *fs)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_action_entry *act;
 	int i;
 
@@ -544,9 +544,9 @@ static bool valid_pedit_action(struct net_device *dev,
 }
 
 static int cxgb4_validate_flow_actions(struct net_device *dev,
-				       struct tc_cls_flower_offload *cls)
+				       struct flow_cls_offload *cls)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_action_entry *act;
 	bool act_redir = false;
 	bool act_pedit = false;
@@ -633,7 +633,7 @@ static int cxgb4_validate_flow_actions(struct net_device *dev,
 }
 
 int cxgb4_tc_flower_replace(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls)
+			    struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_entry *ch_flower;
@@ -709,7 +709,7 @@ free_entry:
 }
 
 int cxgb4_tc_flower_destroy(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls)
+			    struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_entry *ch_flower;
@@ -783,7 +783,7 @@ static void ch_flower_stats_cb(struct timer_list *t)
 }
 
 int cxgb4_tc_flower_stats(struct net_device *dev,
-			  struct tc_cls_flower_offload *cls)
+			  struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_stats *ofld_stats;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index 050c8a50ae41..eb4c95248baf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -109,11 +109,11 @@ struct ch_tc_pedit_fields {
 #define PEDIT_UDP_SPORT_DPORT		0x0
 
 int cxgb4_tc_flower_replace(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls);
+			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_destroy(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls);
+			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_stats(struct net_device *dev,
-			  struct tc_cls_flower_offload *cls);
+			  struct flow_cls_offload *cls);
 
 int cxgb4_init_tc_flower(struct adapter *adap);
 void cxgb4_cleanup_tc_flower(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 6c685b920713..5b602243d573 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -352,25 +352,32 @@ static int
 request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	struct uld_msix_info *minfo;
 	int err = 0;
 	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
 		bmap_idx = rxq_info->msix_tbl[idx];
-		err = request_irq(adap->msix_info_ulds[bmap_idx].vec,
+		minfo = &adap->msix_info_ulds[bmap_idx];
+		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
-				  adap->msix_info_ulds[bmap_idx].desc,
+				  minfo->desc,
 				  &rxq_info->uldrxq[idx].rspq);
 		if (err)
 			goto unwind;
+
+		cxgb4_set_msix_aff(adap, minfo->vec,
+				   &minfo->aff_mask, idx);
 	}
 	return 0;
+
 unwind:
 	while (idx-- > 0) {
 		bmap_idx = rxq_info->msix_tbl[idx];
+		minfo = &adap->msix_info_ulds[bmap_idx];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_msix_idx_in_bmap(adap, bmap_idx);
-		free_irq(adap->msix_info_ulds[bmap_idx].vec,
-			 &rxq_info->uldrxq[idx].rspq);
+		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 	return err;
 }
@@ -379,14 +386,16 @@ static void
 free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	struct uld_msix_info *minfo;
 	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
 		bmap_idx = rxq_info->msix_tbl[idx];
+		minfo = &adap->msix_info_ulds[bmap_idx];
 
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_msix_idx_in_bmap(adap, bmap_idx);
-		free_irq(adap->msix_info_ulds[bmap_idx].vec,
-			 &rxq_info->uldrxq[idx].rspq);
+		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 21da34a4ca24..cee582e36134 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -292,6 +292,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range ocq;
 	struct cxgb4_range key;
 	unsigned int ncrypto_fc;
+	struct cxgb4_range ppod_edram;
 };
 
 struct chcr_stats_debug {
@@ -393,6 +394,7 @@ int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
 int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
 unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
 unsigned int cxgb4_port_idx(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 93feb258067b..9dd5ed9a2965 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6209,6 +6209,37 @@ unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx)
 }
 
 /**
+ *      t4_get_tp_e2c_map - return the E2C channel map associated with a port
+ *      @adapter: the adapter
+ *      @pidx: the port index
+ */
+static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx)
+{
+	unsigned int nports;
+	u32 param, val = 0;
+	int ret;
+
+	nports = 1 << NUMPORTS_G(t4_read_reg(adapter, MPS_CMN_CTL_A));
+	if (pidx >= nports) {
+		CH_WARN(adapter, "TP E2C Channel Port Index %d >= Nports %d\n",
+			pidx, nports);
+		return 0;
+	}
+
+	/* FW version >= 1.16.44.0 can determine E2C channel map using
+	 * FW_PARAMS_PARAM_DEV_TPCHMAP API.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_TPCHMAP));
+	ret = t4_query_params_ns(adapter, adapter->mbox, adapter->pf,
+				 0, 1, &param, &val);
+	if (!ret)
+		return (val >> (8 * pidx)) & 0xff;
+
+	return 0;
+}
+
+/**
  *	t4_get_tp_ch_map - return TP ingress channels associated with a port
  *	@adapter: the adapter
  *	@pidx: the port index
@@ -9368,8 +9399,9 @@ int t4_init_sge_params(struct adapter *adapter)
  */
 int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 {
-	int chan;
-	u32 v;
+	u32 param, val, v;
+	int chan, ret;
+
 
 	v = t4_read_reg(adap, TP_TIMER_RESOLUTION_A);
 	adap->params.tp.tre = TIMERRESOLUTION_G(v);
@@ -9379,11 +9411,47 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 	for (chan = 0; chan < NCHAN; chan++)
 		adap->params.tp.tx_modq[chan] = chan;
 
-	/* Cache the adapter's Compressed Filter Mode and global Incress
+	/* Cache the adapter's Compressed Filter Mode/Mask and global Ingress
 	 * Configuration.
 	 */
-	t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
-		       TP_VLAN_PRI_MAP_A, sleep_ok);
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FILTER) |
+		 FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_FILTER_MODE_MASK));
+
+	/* Read current value */
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+			      &param, &val);
+	if (ret == 0) {
+		dev_info(adap->pdev_dev,
+			 "Current filter mode/mask 0x%x:0x%x\n",
+			 FW_PARAMS_PARAM_FILTER_MODE_G(val),
+			 FW_PARAMS_PARAM_FILTER_MASK_G(val));
+		adap->params.tp.vlan_pri_map =
+			FW_PARAMS_PARAM_FILTER_MODE_G(val);
+		adap->params.tp.filter_mask =
+			FW_PARAMS_PARAM_FILTER_MASK_G(val);
+	} else {
+		dev_info(adap->pdev_dev,
+			 "Failed to read filter mode/mask via fw api, using indirect-reg-read\n");
+
+		/* Incase of older-fw (which doesn't expose the api
+		 * FW_PARAM_DEV_FILTER_MODE_MASK) and newer-driver (which uses
+		 * the fw api) combination, fall-back to older method of reading
+		 * the filter mode from indirect-register
+		 */
+		t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+			       TP_VLAN_PRI_MAP_A, sleep_ok);
+
+		/* With the older-fw and newer-driver combination we might run
+		 * into an issue when user wants to use hash filter region but
+		 * the filter_mask is zero, in this case filter_mask validation
+		 * is tough. To avoid that we set the filter_mask same as filter
+		 * mode, which will behave exactly as the older way of ignoring
+		 * the filter mask validation.
+		 */
+		adap->params.tp.filter_mask = adap->params.tp.vlan_pri_map;
+	}
+
 	t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
 		       TP_INGRESS_CONFIG_A, sleep_ok);
 
@@ -9594,6 +9662,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 	pi->tx_chan = port;
 	pi->lport = port;
 	pi->rss_size = rss_size;
+	pi->rx_cchan = t4_get_tp_e2c_map(pi->adapter, port);
 
 	/* If fw supports returning the VIN as part of FW_VI_CMD,
 	 * save the returned values.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index eb222d40ddbf..a957a6e4d4c4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -1334,6 +1334,10 @@
 #define TP_OUT_CONFIG_A		0x7d04
 #define TP_GLOBAL_CONFIG_A	0x7d08
 
+#define ACTIVEFILTERCOUNTS_S    22
+#define ACTIVEFILTERCOUNTS_V(x) ((x) << ACTIVEFILTERCOUNTS_S)
+#define ACTIVEFILTERCOUNTS_F    ACTIVEFILTERCOUNTS_V(1U)
+
 #define TP_CMM_TCB_BASE_A 0x7d10
 #define TP_CMM_MM_BASE_A 0x7d14
 #define TP_CMM_TIMER_BASE_A 0x7d18
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index b2a618e72fcf..65313f6b5704 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1221,6 +1221,23 @@ enum fw_params_mnem {
 /*
  * device parameters
  */
+
+#define FW_PARAMS_PARAM_FILTER_MODE_S 16
+#define FW_PARAMS_PARAM_FILTER_MODE_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MODE_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MODE_S)
+#define FW_PARAMS_PARAM_FILTER_MODE_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MODE_S) & \
+	FW_PARAMS_PARAM_FILTER_MODE_M)
+
+#define FW_PARAMS_PARAM_FILTER_MASK_S 0
+#define FW_PARAMS_PARAM_FILTER_MASK_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MASK_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MASK_S)
+#define FW_PARAMS_PARAM_FILTER_MASK_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MASK_S) & \
+	FW_PARAMS_PARAM_FILTER_MASK_M)
+
 enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_CCLK	= 0x00, /* chip core clock in khz */
 	FW_PARAMS_PARAM_DEV_PORTVEC	= 0x01, /* the port vector */
@@ -1250,12 +1267,16 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_TPCHMAP     = 0x1F,
 	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
 	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+	FW_PARAMS_PARAM_DEV_PPOD_EDRAM  = 0x23,
 	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 	FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
 };
 
 /*
@@ -1312,6 +1333,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_RAWF_END = 0x37,
 	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39,
 	FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
+	FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_START = 0x3B,
+	FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_END = 0x3C,
 	FW_PARAMS_PARAM_PFVF_LINK_STATE = 0x40,
 };
 
@@ -1347,6 +1370,11 @@ enum fw_params_param_dev_diag {
 	FW_PARAM_DEV_DIAG_MAXTMPTHRESH	= 0x02,
 };
 
+enum fw_params_param_dev_filter {
+	FW_PARAM_DEV_FILTER_VNIC_MODE   = 0x00,
+	FW_PARAM_DEV_FILTER_MODE_MASK   = 0x01,
+};
+
 enum fw_params_param_dev_fwcache {
 	FW_PARAM_DEV_FWCACHE_FLUSH      = 0x00,
 	FW_PARAM_DEV_FWCACHE_FLUSHINV   = 0x01,
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index e2919005ead3..21034536c9c5 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -123,6 +123,9 @@ static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
 	unsigned int cpu;
 	int i;
 
+	if (!ppm->pool)
+		return -EINVAL;
+
 	cpu = get_cpu();
 	pool = per_cpu_ptr(ppm->pool, cpu);
 	spin_lock_bh(&pool->lock);
@@ -169,7 +172,9 @@ static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
 	}
 
 	ppm->next = i + count;
-	if (ppm->next >= ppm->bmap_index_max)
+	if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram))
+		ppm->next = 0;
+	else if (ppm->next >= ppm->bmap_index_max)
 		ppm->next = 0;
 
 	spin_unlock_bh(&ppm->map_lock);
@@ -382,18 +387,36 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
 
 int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 		   struct pci_dev *pdev, void *lldev,
-		   struct cxgbi_tag_format *tformat,
-		   unsigned int ppmax,
-		   unsigned int llimit,
-		   unsigned int start,
-		   unsigned int reserve_factor)
+		   struct cxgbi_tag_format *tformat, unsigned int iscsi_size,
+		   unsigned int llimit, unsigned int start,
+		   unsigned int reserve_factor, unsigned int iscsi_edram_start,
+		   unsigned int iscsi_edram_size)
 {
 	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
 	struct cxgbi_ppm_pool *pool = NULL;
-	unsigned int ppmax_pool = 0;
 	unsigned int pool_index_max = 0;
-	unsigned int alloc_sz;
+	unsigned int ppmax_pool = 0;
 	unsigned int ppod_bmap_size;
+	unsigned int alloc_sz;
+	unsigned int ppmax;
+
+	if (!iscsi_edram_start)
+		iscsi_edram_size = 0;
+
+	if (iscsi_edram_size &&
+	    ((iscsi_edram_start + iscsi_edram_size) != start)) {
+		pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
+			"size 0x%x DDR start 0x%x\n",
+			iscsi_edram_start, iscsi_edram_size, start);
+		return -EINVAL;
+	}
+
+	if (iscsi_edram_size) {
+		reserve_factor = 0;
+		start = iscsi_edram_start;
+	}
+
+	ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT;
 
 	if (ppm) {
 		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
@@ -434,6 +457,14 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 			__func__, ppmax, ppmax_pool, ppod_bmap_size, start,
 			end);
 	}
+	if (iscsi_edram_size) {
+		unsigned int first_ddr_idx =
+				iscsi_edram_size >> PPOD_SIZE_SHIFT;
+
+		ppm->max_index_in_edram = first_ddr_idx - 1;
+		bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1);
+		pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx);
+	}
 
 	spin_lock_init(&ppm->map_lock);
 	kref_init(&ppm->refcnt);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index a91ad766cef0..7b02c200dd1e 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
@@ -143,6 +143,7 @@ struct cxgbi_ppm {
 	spinlock_t map_lock;		/* ppm map lock */
 	unsigned int bmap_index_max;
 	unsigned int next;
+	unsigned int max_index_in_edram;
 	unsigned long *ppod_bmap;
 	struct cxgbi_ppod_data ppod_data[0];
 };
@@ -324,9 +325,9 @@ int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *, unsigned short nr_pages,
 			    unsigned long caller_data);
 int cxgbi_ppm_init(void **ppm_pp, struct net_device *, struct pci_dev *,
 		   void *lldev, struct cxgbi_tag_format *,
-		   unsigned int ppmax, unsigned int llimit,
-		   unsigned int start,
-		   unsigned int reserve_factor);
+		   unsigned int iscsi_size, unsigned int llimit,
+		   unsigned int start, unsigned int reserve_factor,
+		   unsigned int edram_start, unsigned int edram_size);
 int cxgbi_ppm_release(struct cxgbi_ppm *ppm);
 void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *);
 unsigned int cxgbi_tagmask_set(unsigned int ppmax);
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index 8bd384720f80..fbef2829f3de 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -10,8 +10,7 @@ config FSL_DPAA2_ETH
 
 config FSL_DPAA2_PTP_CLOCK
 	tristate "Freescale DPAA2 PTP Clock"
-	depends on FSL_DPAA2_ETH
-	imply PTP_1588_CLOCK
+	depends on FSL_DPAA2_ETH && PTP_1588_CLOCK_QORIQ
 	default y
 	help
 	  This driver adds support for using the DPAA2 1588 timer module
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7d2390e3df77..0acb11557ed1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -555,7 +555,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
 	/* Prepare the HW SGT structure */
 	sgt_buf_size = priv->tx_data_offset +
 		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
-	sgt_buf = netdev_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
+	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
 	if (unlikely(!sgt_buf)) {
 		err = -ENOMEM;
 		goto sgt_buf_alloc_failed;
@@ -757,6 +757,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	u16 queue_mapping;
 	unsigned int needed_headroom;
 	u32 fd_len;
+	u8 prio = 0;
 	int err, i;
 
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
@@ -814,6 +815,18 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * a queue affined to the same core that processed the Rx frame
 	 */
 	queue_mapping = skb_get_queue_mapping(skb);
+
+	if (net_dev->num_tc) {
+		prio = netdev_txq_to_tc(net_dev, queue_mapping);
+		/* Hardware interprets priority level 0 as being the highest,
+		 * so we need to do a reverse mapping to the netdev tc index
+		 */
+		prio = net_dev->num_tc - prio - 1;
+		/* We have only one FQ array entry for all Tx hardware queues
+		 * with the same flow id (but different priority levels)
+		 */
+		queue_mapping %= dpaa2_eth_queue_count(priv);
+	}
 	fq = &priv->fq[queue_mapping];
 
 	fd_len = dpaa2_fd_get_len(&fd);
@@ -824,7 +837,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * the Tx confirmation callback for this frame
 	 */
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0);
+		err = priv->enqueue(priv, fq, &fd, prio);
 		if (err != -EBUSY)
 			break;
 	}
@@ -997,13 +1010,6 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 	int i, j;
 	int new_count;
 
-	/* This is the lazy seeding of Rx buffer pools.
-	 * dpaa2_add_bufs() is also used on the Rx hotpath and calls
-	 * napi_alloc_frag(). The trouble with that is that it in turn ends up
-	 * calling this_cpu_ptr(), which mandates execution in atomic context.
-	 * Rather than splitting up the code, do a one-off preempt disable.
-	 */
-	preempt_disable();
 	for (j = 0; j < priv->num_channels; j++) {
 		for (i = 0; i < DPAA2_ETH_NUM_BUFS;
 		     i += DPAA2_ETH_BUFS_PER_CMD) {
@@ -1011,12 +1017,10 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 			priv->channel[j]->buf_count += new_count;
 
 			if (new_count < DPAA2_ETH_BUFS_PER_CMD) {
-				preempt_enable();
 				return -ENOMEM;
 			}
 		}
 	}
-	preempt_enable();
 
 	return 0;
 }
@@ -1872,6 +1876,78 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	return n - drops;
 }
 
+static int update_xps(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct cpumask xps_mask;
+	struct dpaa2_eth_fq *fq;
+	int i, num_queues, netdev_queues;
+	int err = 0;
+
+	num_queues = dpaa2_eth_queue_count(priv);
+	netdev_queues = (net_dev->num_tc ? : 1) * num_queues;
+
+	/* The first <num_queues> entries in priv->fq array are Tx/Tx conf
+	 * queues, so only process those
+	 */
+	for (i = 0; i < netdev_queues; i++) {
+		fq = &priv->fq[i % num_queues];
+
+		cpumask_clear(&xps_mask);
+		cpumask_set_cpu(fq->target_cpu, &xps_mask);
+
+		err = netif_set_xps_queue(net_dev, &xps_mask, i);
+		if (err) {
+			netdev_warn_once(net_dev, "Error setting XPS queue\n");
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int dpaa2_eth_setup_tc(struct net_device *net_dev,
+			      enum tc_setup_type type, void *type_data)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	u8 num_tc, num_queues;
+	int i;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EINVAL;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_queues = dpaa2_eth_queue_count(priv);
+	num_tc = mqprio->num_tc;
+
+	if (num_tc == net_dev->num_tc)
+		return 0;
+
+	if (num_tc  > dpaa2_eth_tc_count(priv)) {
+		netdev_err(net_dev, "Max %d traffic classes supported\n",
+			   dpaa2_eth_tc_count(priv));
+		return -EINVAL;
+	}
+
+	if (!num_tc) {
+		netdev_reset_tc(net_dev);
+		netif_set_real_num_tx_queues(net_dev, num_queues);
+		goto out;
+	}
+
+	netdev_set_num_tc(net_dev, num_tc);
+	netif_set_real_num_tx_queues(net_dev, num_tc * num_queues);
+
+	for (i = 0; i < num_tc; i++)
+		netdev_set_tc_queue(net_dev, i, num_queues, i * num_queues);
+
+out:
+	update_xps(priv);
+
+	return 0;
+}
+
 static const struct net_device_ops dpaa2_eth_ops = {
 	.ndo_open = dpaa2_eth_open,
 	.ndo_start_xmit = dpaa2_eth_tx,
@@ -1884,6 +1960,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
 	.ndo_change_mtu = dpaa2_eth_change_mtu,
 	.ndo_bpf = dpaa2_eth_xdp,
 	.ndo_xdp_xmit = dpaa2_eth_xdp_xmit,
+	.ndo_setup_tc = dpaa2_eth_setup_tc,
 };
 
 static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
@@ -2138,10 +2215,9 @@ static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
 static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 {
 	struct device *dev = priv->net_dev->dev.parent;
-	struct cpumask xps_mask;
 	struct dpaa2_eth_fq *fq;
 	int rx_cpu, txc_cpu;
-	int i, err;
+	int i;
 
 	/* For each FQ, pick one channel/CPU to deliver frames to.
 	 * This may well change at runtime, either through irqbalance or
@@ -2160,17 +2236,6 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 			break;
 		case DPAA2_TX_CONF_FQ:
 			fq->target_cpu = txc_cpu;
-
-			/* Tell the stack to affine to txc_cpu the Tx queue
-			 * associated with the confirmation one
-			 */
-			cpumask_clear(&xps_mask);
-			cpumask_set_cpu(txc_cpu, &xps_mask);
-			err = netif_set_xps_queue(priv->net_dev, &xps_mask,
-						  fq->flowid);
-			if (err)
-				dev_err(dev, "Error setting XPS queue\n");
-
 			txc_cpu = cpumask_next(txc_cpu, &priv->dpio_cpumask);
 			if (txc_cpu >= nr_cpu_ids)
 				txc_cpu = cpumask_first(&priv->dpio_cpumask);
@@ -2180,6 +2245,8 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 		}
 		fq->channel = get_affine_channel(priv, fq->target_cpu);
 	}
+
+	update_xps(priv);
 }
 
 static void setup_fqs(struct dpaa2_eth_priv *priv)
@@ -2361,11 +2428,10 @@ static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 
 static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd,
-				       u8 prio __always_unused)
+				       struct dpaa2_fd *fd, u8 prio)
 {
 	return dpaa2_io_service_enqueue_fq(fq->channel->dpio,
-					   fq->tx_fqid, fd);
+					   fq->tx_fqid[prio], fd);
 }
 
 static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
@@ -2479,14 +2545,9 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	queue.destination.type = DPNI_DEST_DPCON;
 	queue.destination.priority = 1;
 	queue.user_context = (u64)(uintptr_t)fq;
-	queue.flc.stash_control = 1;
-	queue.flc.value &= 0xFFFFFFFFFFFFFFC0;
-	/* 01 01 00 - data, annotation, flow context */
-	queue.flc.value |= 0x14;
 	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
 			     DPNI_QUEUE_RX, 0, fq->flowid,
-			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST |
-			     DPNI_QUEUE_OPT_FLC,
+			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
 			     &queue);
 	if (err) {
 		dev_err(dev, "dpni_set_queue(RX) failed\n");
@@ -2526,17 +2587,21 @@ static int setup_tx_flow(struct dpaa2_eth_priv *priv,
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_queue queue;
 	struct dpni_queue_id qid;
-	int err;
+	int i, err;
 
-	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
-			     DPNI_QUEUE_TX, 0, fq->flowid, &queue, &qid);
-	if (err) {
-		dev_err(dev, "dpni_get_queue(TX) failed\n");
-		return err;
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_TX, i, fq->flowid,
+				     &queue, &qid);
+		if (err) {
+			dev_err(dev, "dpni_get_queue(TX) failed\n");
+			return err;
+		}
+		fq->tx_fqid[i] = qid.fqid;
 	}
 
+	/* All Tx queues belonging to the same flowid have the same qdbin */
 	fq->tx_qdbin = qid.qdbin;
-	fq->tx_fqid = qid.fqid;
 
 	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
 			     DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid,
@@ -3236,7 +3301,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	dev = &dpni_dev->dev;
 
 	/* Net device */
-	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA2_ETH_MAX_TX_QUEUES);
+	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA2_ETH_MAX_NETDEV_QUEUES);
 	if (!net_dev) {
 		dev_err(dev, "alloc_etherdev_mq() failed\n");
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index e180d5a68c98..9af18c24221f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -282,10 +282,13 @@ struct dpaa2_eth_ch_stats {
 };
 
 /* Maximum number of queues associated with a DPNI */
+#define DPAA2_ETH_MAX_TCS		8
 #define DPAA2_ETH_MAX_RX_QUEUES		16
 #define DPAA2_ETH_MAX_TX_QUEUES		16
 #define DPAA2_ETH_MAX_QUEUES		(DPAA2_ETH_MAX_RX_QUEUES + \
 					DPAA2_ETH_MAX_TX_QUEUES)
+#define DPAA2_ETH_MAX_NETDEV_QUEUES	\
+	(DPAA2_ETH_MAX_TX_QUEUES * DPAA2_ETH_MAX_TCS)
 
 #define DPAA2_ETH_MAX_DPCONS		16
 
@@ -299,8 +302,9 @@ struct dpaa2_eth_priv;
 struct dpaa2_eth_fq {
 	u32 fqid;
 	u32 tx_qdbin;
-	u32 tx_fqid;
+	u32 tx_fqid[DPAA2_ETH_MAX_TCS];
 	u16 flowid;
+	u8 tc;
 	int target_cpu;
 	u32 dq_frames;
 	u32 dq_bytes;
@@ -448,6 +452,9 @@ static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv,
 #define dpaa2_eth_fs_count(priv)        \
 	((priv)->dpni_attrs.fs_entries)
 
+#define dpaa2_eth_tc_count(priv)	\
+	((priv)->dpni_attrs.num_tcs)
+
 /* We have exactly one {Rx, Tx conf} queue per channel */
 #define dpaa2_eth_queue_count(priv)     \
 	((priv)->num_channels)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index 9b150db3b510..a9503aea527f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -5,114 +5,58 @@
  */
 
 #include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/ptp_clock_kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/msi.h>
 #include <linux/fsl/mc.h>
+#include <linux/fsl/ptp_qoriq.h>
 
 #include "dpaa2-ptp.h"
 
-struct ptp_dpaa2_priv {
-	struct fsl_mc_device *ptp_mc_dev;
-	struct ptp_clock *clock;
-	struct ptp_clock_info caps;
-	u32 freq_comp;
-};
-
-/* PTP clock operations */
-static int ptp_dpaa2_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int dpaa2_ptp_enable(struct ptp_clock_info *ptp,
+			    struct ptp_clock_request *rq, int on)
 {
-	struct ptp_dpaa2_priv *ptp_dpaa2 =
-		container_of(ptp, struct ptp_dpaa2_priv, caps);
-	struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev;
-	struct device *dev = &mc_dev->dev;
-	u64 adj;
-	u32 diff, tmr_add;
-	int neg_adj = 0;
-	int err = 0;
-
-	if (ppb < 0) {
-		neg_adj = 1;
-		ppb = -ppb;
-	}
-
-	tmr_add = ptp_dpaa2->freq_comp;
-	adj = tmr_add;
-	adj *= ppb;
-	diff = div_u64(adj, 1000000000ULL);
-
-	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+	struct fsl_mc_device *mc_dev;
+	struct device *dev;
+	u32 mask = 0;
+	u32 bit;
+	int err;
 
-	err = dprtc_set_freq_compensation(mc_dev->mc_io, 0,
-					  mc_dev->mc_handle, tmr_add);
-	if (err)
-		dev_err(dev, "dprtc_set_freq_compensation err %d\n", err);
-	return err;
-}
+	dev = ptp_qoriq->dev;
+	mc_dev = to_fsl_mc_device(dev);
 
-static int ptp_dpaa2_adjtime(struct ptp_clock_info *ptp, s64 delta)
-{
-	struct ptp_dpaa2_priv *ptp_dpaa2 =
-		container_of(ptp, struct ptp_dpaa2_priv, caps);
-	struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev;
-	struct device *dev = &mc_dev->dev;
-	s64 now;
-	int err = 0;
+	switch (rq->type) {
+	case PTP_CLK_REQ_PPS:
+		bit = DPRTC_EVENT_PPS;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
-	err = dprtc_get_time(mc_dev->mc_io, 0, mc_dev->mc_handle, &now);
-	if (err) {
-		dev_err(dev, "dprtc_get_time err %d\n", err);
+	err = dprtc_get_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				 DPRTC_IRQ_INDEX, &mask);
+	if (err < 0) {
+		dev_err(dev, "dprtc_get_irq_mask(): %d\n", err);
 		return err;
 	}
 
-	now += delta;
+	if (on)
+		mask |= bit;
+	else
+		mask &= ~bit;
 
-	err = dprtc_set_time(mc_dev->mc_io, 0, mc_dev->mc_handle, now);
-	if (err)
-		dev_err(dev, "dprtc_set_time err %d\n", err);
-	return err;
-}
-
-static int ptp_dpaa2_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
-{
-	struct ptp_dpaa2_priv *ptp_dpaa2 =
-		container_of(ptp, struct ptp_dpaa2_priv, caps);
-	struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev;
-	struct device *dev = &mc_dev->dev;
-	u64 ns;
-	u32 remainder;
-	int err = 0;
-
-	err = dprtc_get_time(mc_dev->mc_io, 0, mc_dev->mc_handle, &ns);
-	if (err) {
-		dev_err(dev, "dprtc_get_time err %d\n", err);
+	err = dprtc_set_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				 DPRTC_IRQ_INDEX, mask);
+	if (err < 0) {
+		dev_err(dev, "dprtc_set_irq_mask(): %d\n", err);
 		return err;
 	}
 
-	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-	ts->tv_nsec = remainder;
-	return err;
-}
-
-static int ptp_dpaa2_settime(struct ptp_clock_info *ptp,
-			     const struct timespec64 *ts)
-{
-	struct ptp_dpaa2_priv *ptp_dpaa2 =
-		container_of(ptp, struct ptp_dpaa2_priv, caps);
-	struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev;
-	struct device *dev = &mc_dev->dev;
-	u64 ns;
-	int err = 0;
-
-	ns = ts->tv_sec * 1000000000ULL;
-	ns += ts->tv_nsec;
-
-	err = dprtc_set_time(mc_dev->mc_io, 0, mc_dev->mc_handle, ns);
-	if (err)
-		dev_err(dev, "dprtc_set_time err %d\n", err);
-	return err;
+	return 0;
 }
 
-static const struct ptp_clock_info ptp_dpaa2_caps = {
+static const struct ptp_clock_info dpaa2_ptp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "DPAA2 PTP Clock",
 	.max_adj	= 512000,
@@ -121,21 +65,58 @@ static const struct ptp_clock_info ptp_dpaa2_caps = {
 	.n_per_out	= 3,
 	.n_pins		= 0,
 	.pps		= 1,
-	.adjfreq	= ptp_dpaa2_adjfreq,
-	.adjtime	= ptp_dpaa2_adjtime,
-	.gettime64	= ptp_dpaa2_gettime,
-	.settime64	= ptp_dpaa2_settime,
+	.adjfine	= ptp_qoriq_adjfine,
+	.adjtime	= ptp_qoriq_adjtime,
+	.gettime64	= ptp_qoriq_gettime,
+	.settime64	= ptp_qoriq_settime,
+	.enable		= dpaa2_ptp_enable,
 };
 
+static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv)
+{
+	struct ptp_qoriq *ptp_qoriq = priv;
+	struct ptp_clock_event event;
+	struct fsl_mc_device *mc_dev;
+	struct device *dev;
+	u32 status = 0;
+	int err;
+
+	dev = ptp_qoriq->dev;
+	mc_dev = to_fsl_mc_device(dev);
+
+	err = dprtc_get_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				   DPRTC_IRQ_INDEX, &status);
+	if (unlikely(err)) {
+		dev_err(dev, "dprtc_get_irq_status err %d\n", err);
+		return IRQ_NONE;
+	}
+
+	if (status & DPRTC_EVENT_PPS) {
+		event.type = PTP_CLOCK_PPS;
+		ptp_clock_event(ptp_qoriq->clock, &event);
+	}
+
+	err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				     DPRTC_IRQ_INDEX, status);
+	if (unlikely(err)) {
+		dev_err(dev, "dprtc_clear_irq_status err %d\n", err);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
 static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
 {
 	struct device *dev = &mc_dev->dev;
-	struct ptp_dpaa2_priv *ptp_dpaa2;
-	u32 tmr_add = 0;
+	struct fsl_mc_device_irq *irq;
+	struct ptp_qoriq *ptp_qoriq;
+	struct device_node *node;
+	void __iomem *base;
 	int err;
 
-	ptp_dpaa2 = devm_kzalloc(dev, sizeof(*ptp_dpaa2), GFP_KERNEL);
-	if (!ptp_dpaa2)
+	ptp_qoriq = devm_kzalloc(dev, sizeof(*ptp_qoriq), GFP_KERNEL);
+	if (!ptp_qoriq)
 		return -ENOMEM;
 
 	err = fsl_mc_portal_allocate(mc_dev, 0, &mc_dev->mc_io);
@@ -154,30 +135,60 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
 		goto err_free_mcp;
 	}
 
-	ptp_dpaa2->ptp_mc_dev = mc_dev;
+	ptp_qoriq->dev = dev;
 
-	err = dprtc_get_freq_compensation(mc_dev->mc_io, 0,
-					  mc_dev->mc_handle, &tmr_add);
-	if (err) {
-		dev_err(dev, "dprtc_get_freq_compensation err %d\n", err);
+	node = of_find_compatible_node(NULL, NULL, "fsl,dpaa2-ptp");
+	if (!node) {
+		err = -ENODEV;
 		goto err_close;
 	}
 
-	ptp_dpaa2->freq_comp = tmr_add;
-	ptp_dpaa2->caps = ptp_dpaa2_caps;
+	dev->of_node = node;
 
-	ptp_dpaa2->clock = ptp_clock_register(&ptp_dpaa2->caps, dev);
-	if (IS_ERR(ptp_dpaa2->clock)) {
-		err = PTR_ERR(ptp_dpaa2->clock);
+	base = of_iomap(node, 0);
+	if (!base) {
+		err = -ENOMEM;
 		goto err_close;
 	}
 
-	dpaa2_phc_index = ptp_clock_index(ptp_dpaa2->clock);
+	err = fsl_mc_allocate_irqs(mc_dev);
+	if (err) {
+		dev_err(dev, "MC irqs allocation failed\n");
+		goto err_unmap;
+	}
+
+	irq = mc_dev->irqs[0];
+	ptp_qoriq->irq = irq->msi_desc->irq;
 
-	dev_set_drvdata(dev, ptp_dpaa2);
+	err = devm_request_threaded_irq(dev, ptp_qoriq->irq, NULL,
+					dpaa2_ptp_irq_handler_thread,
+					IRQF_NO_SUSPEND | IRQF_ONESHOT,
+					dev_name(dev), ptp_qoriq);
+	if (err < 0) {
+		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
+		goto err_free_mc_irq;
+	}
+
+	err = dprtc_set_irq_enable(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				   DPRTC_IRQ_INDEX, 1);
+	if (err < 0) {
+		dev_err(dev, "dprtc_set_irq_enable(): %d\n", err);
+		goto err_free_mc_irq;
+	}
+
+	err = ptp_qoriq_init(ptp_qoriq, base, &dpaa2_ptp_caps);
+	if (err)
+		goto err_free_mc_irq;
+
+	dpaa2_phc_index = ptp_qoriq->phc_index;
+	dev_set_drvdata(dev, ptp_qoriq);
 
 	return 0;
 
+err_free_mc_irq:
+	fsl_mc_free_irqs(mc_dev);
+err_unmap:
+	iounmap(base);
 err_close:
 	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 err_free_mcp:
@@ -188,12 +199,15 @@ err_exit:
 
 static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev)
 {
-	struct ptp_dpaa2_priv *ptp_dpaa2;
 	struct device *dev = &mc_dev->dev;
+	struct ptp_qoriq *ptp_qoriq;
+
+	ptp_qoriq = dev_get_drvdata(dev);
 
-	ptp_dpaa2 = dev_get_drvdata(dev);
-	ptp_clock_unregister(ptp_dpaa2->clock);
+	dpaa2_phc_index = -1;
+	ptp_qoriq_free(ptp_qoriq);
 
+	fsl_mc_free_irqs(mc_dev);
 	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 	fsl_mc_portal_free(mc_dev->mc_io);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
index 9af4ac71f347..720cd50f5895 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
@@ -17,22 +17,54 @@
 #define DPRTC_CMDID_CLOSE			DPRTC_CMD(0x800)
 #define DPRTC_CMDID_OPEN			DPRTC_CMD(0x810)
 
-#define DPRTC_CMDID_SET_FREQ_COMPENSATION	DPRTC_CMD(0x1d1)
-#define DPRTC_CMDID_GET_FREQ_COMPENSATION	DPRTC_CMD(0x1d2)
-#define DPRTC_CMDID_GET_TIME			DPRTC_CMD(0x1d3)
-#define DPRTC_CMDID_SET_TIME			DPRTC_CMD(0x1d4)
+#define DPRTC_CMDID_SET_IRQ_ENABLE		DPRTC_CMD(0x012)
+#define DPRTC_CMDID_GET_IRQ_ENABLE		DPRTC_CMD(0x013)
+#define DPRTC_CMDID_SET_IRQ_MASK		DPRTC_CMD(0x014)
+#define DPRTC_CMDID_GET_IRQ_MASK		DPRTC_CMD(0x015)
+#define DPRTC_CMDID_GET_IRQ_STATUS		DPRTC_CMD(0x016)
+#define DPRTC_CMDID_CLEAR_IRQ_STATUS		DPRTC_CMD(0x017)
 
 #pragma pack(push, 1)
 struct dprtc_cmd_open {
 	__le32 dprtc_id;
 };
 
-struct dprtc_get_freq_compensation {
-	__le32 freq_compensation;
+struct dprtc_cmd_get_irq {
+	__le32 pad;
+	u8 irq_index;
 };
 
-struct dprtc_time {
-	__le64 time;
+struct dprtc_cmd_set_irq_enable {
+	u8 en;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_enable {
+	u8 en;
+};
+
+struct dprtc_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dprtc_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dprtc_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
 };
 
 #pragma pack(pop)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.c b/drivers/net/ethernet/freescale/dpaa2/dprtc.c
index c13e09bc7b9d..ed52a34fa6a1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.c
@@ -74,121 +74,220 @@ int dprtc_close(struct fsl_mc_io *mc_io,
 }
 
 /**
- * dprtc_set_freq_compensation() - Sets a new frequency compensation value.
+ * dprtc_set_irq_enable() - Set overall interrupt state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Interrupt state - enable = 1, disable = 0
  *
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPRTC object
- * @freq_compensation:	The new frequency compensation value to set.
+ * Allows GPP software to control when interrupts are generated.
+ * Each interrupt can have up to 32 causes.  The enable/disable control's the
+ * overall interrupt state. if the interrupt is disabled no causes will cause
+ * an interrupt.
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io,
-				u32 cmd_flags,
-				u16 token,
-				u32 freq_compensation)
+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 en)
 {
-	struct dprtc_get_freq_compensation *cmd_params;
+	struct dprtc_cmd_set_irq_enable *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
 
-	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_FREQ_COMPENSATION,
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_ENABLE,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dprtc_get_freq_compensation *)cmd.params;
-	cmd_params->freq_compensation = cpu_to_le32(freq_compensation);
+	cmd_params = (struct dprtc_cmd_set_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->en = en;
 
 	return mc_send_command(mc_io, &cmd);
 }
 
 /**
- * dprtc_get_freq_compensation() - Retrieves the frequency compensation value
+ * dprtc_get_irq_enable() - Get overall interrupt state
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Returned interrupt state - enable = 1, disable = 0
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 *en)
+{
+	struct dprtc_rsp_get_irq_enable *rsp_params;
+	struct dprtc_cmd_get_irq *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dprtc_rsp_get_irq_enable *)cmd.params;
+	*en = rsp_params->en;
+
+	return 0;
+}
+
+/**
+ * dprtc_set_irq_mask() - Set interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Event mask to trigger interrupt;
+ *		each bit:
+ *			0 = ignore event
+ *			1 = consider event for asserting IRQ
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 mask)
+{
+	struct dprtc_cmd_set_irq_mask *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dprtc_get_irq_mask() - Get interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Returned event mask to trigger interrupt
  *
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPRTC object
- * @freq_compensation:	Frequency compensation value
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io,
-				u32 cmd_flags,
-				u16 token,
-				u32 *freq_compensation)
+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 *mask)
 {
-	struct dprtc_get_freq_compensation *rsp_params;
+	struct dprtc_rsp_get_irq_mask *rsp_params;
+	struct dprtc_cmd_get_irq *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
 	int err;
 
-	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_FREQ_COMPENSATION,
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_MASK,
 					  cmd_flags,
 					  token);
+	cmd_params = (struct dprtc_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
 
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
-	rsp_params = (struct dprtc_get_freq_compensation *)cmd.params;
-	*freq_compensation = le32_to_cpu(rsp_params->freq_compensation);
+	rsp_params = (struct dprtc_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
 
 	return 0;
 }
 
 /**
- * dprtc_get_time() - Returns the current RTC time.
+ * dprtc_get_irq_status() - Get the current status of any pending interrupts.
  *
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPRTC object
- * @time:	Current RTC time.
+ * @irq_index:	The interrupt index to configure
+ * @status:	Returned interrupts status - one bit per cause:
+ *			0 = no interrupt pending
+ *			1 = interrupt pending
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dprtc_get_time(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   uint64_t *time)
+int dprtc_get_irq_status(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u32 *status)
 {
-	struct dprtc_time *rsp_params;
+	struct dprtc_cmd_get_irq_status *cmd_params;
+	struct dprtc_rsp_get_irq_status *rsp_params;
 	struct fsl_mc_command cmd = { 0 };
 	int err;
 
-	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_TIME,
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_STATUS,
 					  cmd_flags,
 					  token);
+	cmd_params = (struct dprtc_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
 
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
-	rsp_params = (struct dprtc_time *)cmd.params;
-	*time = le64_to_cpu(rsp_params->time);
+	rsp_params = (struct dprtc_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
 
 	return 0;
 }
 
 /**
- * dprtc_set_time() - Updates current RTC time.
+ * dprtc_clear_irq_status() - Clear a pending interrupt's status
  *
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPRTC object
- * @time:	New RTC time.
+ * @irq_index:	The interrupt index to configure
+ * @status:	Bits to clear (W1C) - one bit per cause:
+ *			0 = don't change
+ *			1 = clear status bit
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dprtc_set_time(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   uint64_t time)
+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u8 irq_index,
+			   u32 status)
 {
-	struct dprtc_time *cmd_params;
+	struct dprtc_cmd_clear_irq_status *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
 
-	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_TIME,
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLEAR_IRQ_STATUS,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dprtc_time *)cmd.params;
-	cmd_params->time = cpu_to_le64(time);
+	cmd_params = (struct dprtc_cmd_clear_irq_status *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->status = cpu_to_le32(status);
 
 	return mc_send_command(mc_io, &cmd);
 }
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index fe19618d6cdf..be7914c1634d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -13,6 +13,14 @@
 
 struct fsl_mc_io;
 
+/**
+ * Number of irq's
+ */
+#define DPRTC_MAX_IRQ_NUM	1
+#define DPRTC_IRQ_INDEX		0
+
+#define DPRTC_EVENT_PPS		0x08000000
+
 int dprtc_open(struct fsl_mc_io *mc_io,
 	       u32 cmd_flags,
 	       int dprtc_id,
@@ -22,24 +30,40 @@ int dprtc_close(struct fsl_mc_io *mc_io,
 		u32 cmd_flags,
 		u16 token);
 
-int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io,
-				u32 cmd_flags,
-				u16 token,
-				u32 freq_compensation);
-
-int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io,
-				u32 cmd_flags,
-				u16 token,
-				u32 *freq_compensation);
-
-int dprtc_get_time(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   uint64_t *time);
-
-int dprtc_set_time(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   uint64_t time);
+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 en);
+
+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 *en);
+
+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 mask);
+
+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 *mask);
+
+int dprtc_get_irq_status(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u32 *status);
+
+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u8 irq_index,
+			   u32 status);
 
 #endif /* __FSL_DPRTC_H */
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 8429f5c1d810..ed0d010c7cf2 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -29,3 +29,13 @@ config FSL_ENETC_PTP_CLOCK
 	  packets using the SO_TIMESTAMPING API.
 
 	  If compiled as module (M), the module name is fsl-enetc-ptp.
+
+config FSL_ENETC_HW_TIMESTAMPING
+	bool "ENETC hardware timestamping support"
+	depends on FSL_ENETC || FSL_ENETC_VF
+	help
+	  Enable hardware timestamping support on the Ethernet packets
+	  using the SO_TIMESTAMPING API. Because the RX BD ring dynamic
+	  allocation has not been supported and it is too expensive to use
+	  extended RX BDs if timestamping is not used, this option enables
+	  extended RX BDs in order to support hardware timestamping.
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 491475d87736..223709443ea4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -13,7 +13,8 @@
 #define ENETC_MAX_SKB_FRAGS	13
 #define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb);
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads);
 
 netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
@@ -33,7 +34,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 		return NETDEV_TX_BUSY;
 	}
 
-	count = enetc_map_tx_buffs(tx_ring, skb);
+	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
 	if (unlikely(!count))
 		goto drop_packet_err;
 
@@ -105,7 +106,8 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
 	}
 }
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads)
 {
 	struct enetc_tx_swbd *tx_swbd;
 	struct skb_frag_struct *frag;
@@ -137,7 +139,10 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 	count++;
 
 	do_vlan = skb_vlan_tag_present(skb);
-	do_tstamp = skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP;
+	do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) &&
+		    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
+	tx_swbd->do_tstamp = do_tstamp;
+	tx_swbd->check_wb = tx_swbd->do_tstamp;
 
 	if (do_vlan || do_tstamp)
 		flags |= ENETC_TXBD_FLAGS_EX;
@@ -299,24 +304,70 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 	return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
 }
 
+static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
+				u64 *tstamp)
+{
+	u32 lo, hi, tstamp_lo;
+
+	lo = enetc_rd(hw, ENETC_SICTR0);
+	hi = enetc_rd(hw, ENETC_SICTR1);
+	tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
+	if (lo <= tstamp_lo)
+		hi -= 1;
+	*tstamp = (u64)hi << 32 | tstamp_lo;
+}
+
+static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+
+	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+		skb_tstamp_tx(skb, &shhwtstamps);
+	}
+}
+
 static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 {
 	struct net_device *ndev = tx_ring->ndev;
 	int tx_frm_cnt = 0, tx_byte_cnt = 0;
 	struct enetc_tx_swbd *tx_swbd;
 	int i, bds_to_clean;
+	bool do_tstamp;
+	u64 tstamp = 0;
 
 	i = tx_ring->next_to_clean;
 	tx_swbd = &tx_ring->tx_swbd[i];
 	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
 
+	do_tstamp = false;
+
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
 		bool is_eof = !!tx_swbd->skb;
 
+		if (unlikely(tx_swbd->check_wb)) {
+			struct enetc_ndev_priv *priv = netdev_priv(ndev);
+			union enetc_tx_bd *txbd;
+
+			txbd = ENETC_TXBD(*tx_ring, i);
+
+			if (txbd->flags & ENETC_TXBD_FLAGS_W &&
+			    tx_swbd->do_tstamp) {
+				enetc_get_tx_tstamp(&priv->si->hw, txbd,
+						    &tstamp);
+				do_tstamp = true;
+			}
+		}
+
 		if (likely(tx_swbd->dma))
 			enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
 		if (is_eof) {
+			if (unlikely(do_tstamp)) {
+				enetc_tstamp_tx(tx_swbd->skb, tstamp);
+				do_tstamp = false;
+			}
 			napi_consume_skb(tx_swbd->skb, napi_budget);
 			tx_swbd->skb = NULL;
 		}
@@ -425,10 +476,38 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	return j;
 }
 
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static void enetc_get_rx_tstamp(struct net_device *ndev,
+				union enetc_rx_bd *rxbd,
+				struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 lo, hi, tstamp_lo;
+	u64 tstamp;
+
+	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
+		lo = enetc_rd(hw, ENETC_SICTR0);
+		hi = enetc_rd(hw, ENETC_SICTR1);
+		tstamp_lo = le32_to_cpu(rxbd->r.tstamp);
+		if (lo <= tstamp_lo)
+			hi -= 1;
+
+		tstamp = (u64)hi << 32 | tstamp_lo;
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+		shhwtstamps->hwtstamp = ns_to_ktime(tstamp);
+	}
+}
+#endif
+
 static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 			       union enetc_rx_bd *rxbd, struct sk_buff *skb)
 {
-	/* TODO: add tstamp, hashing */
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
+#endif
+	/* TODO: hashing */
 	if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
 		u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
 
@@ -442,6 +521,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       le16_to_cpu(rxbd->r.vlan_opt));
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (priv->active_offloads & ENETC_F_RX_TSTAMP)
+		enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
+#endif
 }
 
 static void enetc_process_skb(struct enetc_bdr *rx_ring,
@@ -1074,6 +1157,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1);
 
 	rbmr = ENETC_RBMR_EN;
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	rbmr |= ENETC_RBMR_BDS;
+#endif
 	if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
 		rbmr |= ENETC_RBMR_VTE;
 
@@ -1341,6 +1427,62 @@ int enetc_close(struct net_device *ndev)
 	return 0;
 }
 
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	struct enetc_bdr *tx_ring;
+	u8 num_tc;
+	int i;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
+
+	if (!num_tc) {
+		netdev_reset_tc(ndev);
+		netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+
+		/* Reset all ring priorities to 0 */
+		for (i = 0; i < priv->num_tx_rings; i++) {
+			tx_ring = priv->tx_ring[i];
+			enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0);
+		}
+
+		return 0;
+	}
+
+	/* Check if we have enough BD rings available to accommodate all TCs */
+	if (num_tc > priv->num_tx_rings) {
+		netdev_err(ndev, "Max %d traffic classes supported\n",
+			   priv->num_tx_rings);
+		return -EINVAL;
+	}
+
+	/* For the moment, we use only one BD ring per TC.
+	 *
+	 * Configure num_tc BD rings with increasing priorities.
+	 */
+	for (i = 0; i < num_tc; i++) {
+		tx_ring = priv->tx_ring[i];
+		enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i);
+	}
+
+	/* Reset the number of netdev queues based on the TC count */
+	netif_set_real_num_tx_queues(ndev, num_tc);
+
+	netdev_set_num_tc(ndev, num_tc);
+
+	/* Each TC is associated with one netdev queue */
+	for (i = 0; i < num_tc; i++)
+		netdev_set_tc_queue(ndev, i, 1, i);
+
+	return 0;
+}
+
 struct net_device_stats *enetc_get_stats(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -1396,6 +1538,70 @@ int enetc_set_features(struct net_device *ndev,
 	return 0;
 }
 
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP;
+		break;
+	case HWTSTAMP_TX_ON:
+		priv->active_offloads |= ENETC_F_TX_TSTAMP;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		priv->active_offloads &= ~ENETC_F_RX_TSTAMP;
+		break;
+	default:
+		priv->active_offloads |= ENETC_F_RX_TSTAMP;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+
+static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	config.flags = 0;
+
+	if (priv->active_offloads & ENETC_F_TX_TSTAMP)
+		config.tx_type = HWTSTAMP_TX_ON;
+	else
+		config.tx_type = HWTSTAMP_TX_OFF;
+
+	config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ?
+			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+#endif
+
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
+{
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (cmd == SIOCSHWTSTAMP)
+		return enetc_hwtstamp_set(ndev, rq);
+	if (cmd == SIOCGHWTSTAMP)
+		return enetc_hwtstamp_get(ndev, rq);
+#endif
+	return -EINVAL;
+}
+
 int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 {
 	struct pci_dev *pdev = priv->si->pdev;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index b274135c5103..541b4e2073fe 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -21,7 +21,9 @@ struct enetc_tx_swbd {
 	struct sk_buff *skb;
 	dma_addr_t dma;
 	u16 len;
-	u16 is_dma_page;
+	u8 is_dma_page:1;
+	u8 check_wb:1;
+	u8 do_tstamp:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
@@ -167,6 +169,12 @@ struct enetc_cls_rule {
 
 #define ENETC_MAX_BDR_INT	2 /* fixed to max # of available cpus */
 
+/* TODO: more hardware offloads */
+enum enetc_active_offloads {
+	ENETC_F_RX_TSTAMP	= BIT(0),
+	ENETC_F_TX_TSTAMP	= BIT(1),
+};
+
 struct enetc_ndev_priv {
 	struct net_device *ndev;
 	struct device *dev; /* dma-mapping device */
@@ -178,6 +186,7 @@ struct enetc_ndev_priv {
 	u16 rx_bd_count, tx_bd_count;
 
 	u16 msg_enable;
+	int active_offloads;
 
 	struct enetc_bdr *tx_ring[16];
 	struct enetc_bdr *rx_ring[16];
@@ -200,6 +209,9 @@ struct enetc_msg_cmd_set_primary_mac {
 
 #define ENETC_CBDR_TIMEOUT	1000 /* usecs */
 
+/* PTP driver exports */
+extern int enetc_phc_index;
+
 /* SI common */
 int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv);
 void enetc_pci_remove(struct pci_dev *pdev);
@@ -216,6 +228,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
 struct net_device_stats *enetc_get_stats(struct net_device *ndev);
 int enetc_set_features(struct net_device *ndev,
 		       netdev_features_t features);
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data);
+
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index b9519b6ad727..fcb52efec075 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -555,6 +555,35 @@ static void enetc_get_ringparam(struct net_device *ndev,
 	}
 }
 
+static int enetc_get_ts_info(struct net_device *ndev,
+			     struct ethtool_ts_info *info)
+{
+	int *phc_idx;
+
+	phc_idx = symbol_get(enetc_phc_index);
+	if (phc_idx) {
+		info->phc_index = *phc_idx;
+		symbol_put(enetc_phc_index);
+	} else {
+		info->phc_index = -1;
+	}
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+			 (1 << HWTSTAMP_TX_ON);
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_ALL);
+#else
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+#endif
+	return 0;
+}
+
 static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_regs_len = enetc_get_reglen,
 	.get_regs = enetc_get_regs,
@@ -571,6 +600,7 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
 };
 
 static const struct ethtool_ops enetc_vf_ethtool_ops = {
@@ -586,6 +616,7 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = {
 	.set_rxfh = enetc_set_rxfh,
 	.get_ringparam = enetc_get_ringparam,
 	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
 };
 
 void enetc_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index df8eb8882d92..88276299f447 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -127,7 +127,7 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_TBSR_BUSY	BIT(0)
 #define ENETC_TBMR_VIH	BIT(9)
 #define ENETC_TBMR_PRIO_MASK		GENMASK(2, 0)
-#define ENETC_TBMR_PRIO_SET(val)	val
+#define ENETC_TBMR_SET_PRIO(val)	((val) & ENETC_TBMR_PRIO_MASK)
 #define ENETC_TBMR_EN	BIT(31)
 #define ENETC_TBSR	0x4
 #define ENETC_TBBAR0	0x10
@@ -361,6 +361,12 @@ union enetc_tx_bd {
 		u8 e_flags;
 		u8 flags;
 	} ext; /* Tx BD extension */
+	struct {
+		__le32 tstamp;
+		u8 reserved[10];
+		u8 status;
+		u8 flags;
+	} wb; /* writeback descriptor */
 };
 
 #define ENETC_TXBD_FLAGS_L4CS	BIT(0)
@@ -399,6 +405,9 @@ union enetc_rx_bd {
 	struct {
 		__le64 addr;
 		u8 reserved[8];
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		u8 reserved1[16];
+#endif
 	} w;
 	struct {
 		__le16 inet_csum;
@@ -413,6 +422,10 @@ union enetc_rx_bd {
 			};
 			__le32 lstatus;
 		};
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		__le32 tstamp;
+		u8 reserved[12];
+#endif
 	} r;
 };
 
@@ -531,3 +544,13 @@ static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx,
 	val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0);
 	enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val);
 }
+
+static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx,
+				      int prio)
+{
+	u32 val = enetc_txbdr_rd(hw, bdr_idx, ENETC_TBMR);
+
+	val &= ~ENETC_TBMR_PRIO_MASK;
+	val |= ENETC_TBMR_SET_PRIO(prio);
+	enetc_txbdr_wr(hw, bdr_idx, ENETC_TBMR, val);
+}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 78287c517095..258b3cb38a6f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -702,6 +702,8 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_set_vf_vlan	= enetc_pf_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= enetc_pf_set_vf_spoofchk,
 	.ndo_set_features	= enetc_pf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
 };
 
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
index 8c1497e7d9c5..2fd2586e42bf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
@@ -7,6 +7,9 @@
 
 #include "enetc.h"
 
+int enetc_phc_index = -1;
+EXPORT_SYMBOL(enetc_phc_index);
+
 static struct ptp_clock_info enetc_ptp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "ENETC PTP clock",
@@ -96,6 +99,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_no_clock;
 
+	enetc_phc_index = ptp_qoriq->phc_index;
 	pci_set_drvdata(pdev, ptp_qoriq);
 
 	return 0;
@@ -119,6 +123,7 @@ static void enetc_ptp_remove(struct pci_dev *pdev)
 {
 	struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev);
 
+	enetc_phc_index = -1;
 	ptp_qoriq_free(ptp_qoriq);
 	kfree(ptp_qoriq);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 72c3ea887bcf..ebd21bf4cfa1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -111,6 +111,8 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_get_stats		= enetc_get_stats,
 	.ndo_set_mac_address	= enetc_vf_set_mac_addr,
 	.ndo_set_features	= enetc_vf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
 };
 
 static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 38f10f7dcbc3..9d459ccf251d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1689,10 +1689,10 @@ static void fec_get_mac(struct net_device *ndev)
 	 */
 	if (!is_valid_ether_addr(iap)) {
 		/* Report it and use a random ethernet address instead */
-		netdev_err(ndev, "Invalid MAC address: %pM\n", iap);
+		dev_err(&fep->pdev->dev, "Invalid MAC address: %pM\n", iap);
 		eth_hw_addr_random(ndev);
-		netdev_info(ndev, "Using random MAC address: %pM\n",
-			    ndev->dev_addr);
+		dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n",
+			 ndev->dev_addr);
 		return;
 	}
 
@@ -2446,30 +2446,31 @@ static int
 fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct device *dev = &fep->pdev->dev;
 	unsigned int cycle;
 
 	if (!(fep->quirks & FEC_QUIRK_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	if (ec->rx_max_coalesced_frames > 255) {
-		pr_err("Rx coalesced frames exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced frames exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	if (ec->tx_max_coalesced_frames > 255) {
-		pr_err("Tx coalesced frame exceed hardware limitation\n");
+		dev_err(dev, "Tx coalesced frame exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesced usec exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesced usec exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
@@ -3473,7 +3474,6 @@ fec_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev,
 				"Failed to enable phy regulator: %d\n", ret);
-			clk_disable_unprepare(fep->clk_ipg);
 			goto failed_regulator;
 		}
 	} else {
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 7e892b1cbd3d..19e2365be7d8 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -617,7 +617,7 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
 	fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev);
 	if (IS_ERR(fep->ptp_clock)) {
 		fep->ptp_clock = NULL;
-		pr_err("ptp_clock_register failed\n");
+		dev_err(&pdev->dev, "ptp_clock_register failed\n");
 	}
 
 	schedule_delayed_work(&fep->time_keep, HZ);
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
index f54da3c684d0..e1bdfed16134 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.c
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c
@@ -144,7 +144,8 @@
 /* Hash Key extraction fields: */
 #define DEFAULT_HASH_KEY_EXTRACT_FIELDS		\
 	(KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \
-	    KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST)
+	 KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST | \
+	 KG_SCH_KN_IPSEC_SPI)
 
 /* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP)
  * don't exist in the frame
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
new file mode 100644
index 000000000000..b8f04d052fda
--- /dev/null
+++ b/drivers/net/ethernet/google/Kconfig
@@ -0,0 +1,27 @@
+#
+# Google network device configuration
+#
+
+config NET_VENDOR_GOOGLE
+	bool "Google Devices"
+	default y
+	help
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Google devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_GOOGLE
+
+config GVE
+	tristate "Google Virtual NIC (gVNIC) support"
+	depends on PCI_MSI
+	help
+	  This driver supports Google Virtual NIC (gVNIC)"
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called gve.
+
+endif #NET_VENDOR_GOOGLE
diff --git a/drivers/net/ethernet/google/Makefile b/drivers/net/ethernet/google/Makefile
new file mode 100644
index 000000000000..402cc3ba1639
--- /dev/null
+++ b/drivers/net/ethernet/google/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Google network device drivers.
+#
+
+obj-$(CONFIG_GVE) += gve/
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
new file mode 100644
index 000000000000..3354ce40eb97
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -0,0 +1,4 @@
+# Makefile for the Google virtual Ethernet (gve) driver
+
+obj-$(CONFIG_GVE) += gve.o
+gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
new file mode 100644
index 000000000000..92372dc43be8
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_H_
+#define _GVE_H_
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/u64_stats_sync.h>
+#include "gve_desc.h"
+
+#ifndef PCI_VENDOR_ID_GOOGLE
+#define PCI_VENDOR_ID_GOOGLE	0x1ae0
+#endif
+
+#define PCI_DEV_ID_GVNIC	0x0042
+
+#define GVE_REGISTER_BAR	0
+#define GVE_DOORBELL_BAR	2
+
+/* Driver can alloc up to 2 segments for the header and 2 for the payload. */
+#define GVE_TX_MAX_IOVEC	4
+/* 1 for management, 1 for rx, 1 for tx */
+#define GVE_MIN_MSIX 3
+
+/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
+struct gve_rx_desc_queue {
+	struct gve_rx_desc *desc_ring; /* the descriptor ring */
+	dma_addr_t bus; /* the bus for the desc_ring */
+	u32 cnt; /* free-running total number of completed packets */
+	u32 fill_cnt; /* free-running total number of descriptors posted */
+	u32 mask; /* masks the cnt to the size of the ring */
+	u8 seqno; /* the next expected seqno for this desc*/
+};
+
+/* The page info for a single slot in the RX data queue */
+struct gve_rx_slot_page_info {
+	struct page *page;
+	void *page_address;
+	u32 page_offset; /* offset to write to in page */
+};
+
+/* A list of pages registered with the device during setup and used by a queue
+ * as buffers
+ */
+struct gve_queue_page_list {
+	u32 id; /* unique id */
+	u32 num_entries;
+	struct page **pages; /* list of num_entries pages */
+	dma_addr_t *page_buses; /* the dma addrs of the pages */
+};
+
+/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */
+struct gve_rx_data_queue {
+	struct gve_rx_data_slot *data_ring; /* read by NIC */
+	dma_addr_t data_bus; /* dma mapping of the slots */
+	struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
+	struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+	u32 mask; /* masks the cnt to the size of the ring */
+	u32 cnt; /* free-running total number of completed packets */
+};
+
+struct gve_priv;
+
+/* An RX ring that contains a power-of-two sized desc and data ring. */
+struct gve_rx_ring {
+	struct gve_priv *gve;
+	struct gve_rx_desc_queue desc;
+	struct gve_rx_data_queue data;
+	u64 rbytes; /* free-running bytes received */
+	u64 rpackets; /* free-running packets received */
+	u32 q_num; /* queue index */
+	u32 ntfy_id; /* notification block index */
+	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+	dma_addr_t q_resources_bus; /* dma address for the queue resources */
+	struct u64_stats_sync statss; /* sync stats for 32bit archs */
+};
+
+/* A TX desc ring entry */
+union gve_tx_desc {
+	struct gve_tx_pkt_desc pkt; /* first desc for a packet */
+	struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
+};
+
+/* Tracks the memory in the fifo occupied by a segment of a packet */
+struct gve_tx_iovec {
+	u32 iov_offset; /* offset into this segment */
+	u32 iov_len; /* length */
+	u32 iov_padding; /* padding associated with this segment */
+};
+
+/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
+ * ring entry but only used for a pkt_desc not a seg_desc
+ */
+struct gve_tx_buffer_state {
+	struct sk_buff *skb; /* skb for this pkt */
+	struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+};
+
+/* A TX buffer - each queue has one */
+struct gve_tx_fifo {
+	void *base; /* address of base of FIFO */
+	u32 size; /* total size */
+	atomic_t available; /* how much space is still available */
+	u32 head; /* offset to write at */
+	struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */
+};
+
+/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */
+struct gve_tx_ring {
+	/* Cacheline 0 -- Accessed & dirtied during transmit */
+	struct gve_tx_fifo tx_fifo;
+	u32 req; /* driver tracked head pointer */
+	u32 done; /* driver tracked tail pointer */
+
+	/* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */
+	__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
+	u64 pkt_done; /* free-running - total packets completed */
+	u64 bytes_done; /* free-running - total bytes completed */
+
+	/* Cacheline 2 -- Read-mostly fields */
+	union gve_tx_desc *desc ____cacheline_aligned;
+	struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
+	struct netdev_queue *netdev_txq;
+	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+	u32 mask; /* masks req and done down to queue size */
+
+	/* Slow-path fields */
+	u32 q_num ____cacheline_aligned; /* queue idx */
+	u32 stop_queue; /* count of queue stops */
+	u32 wake_queue; /* count of queue wakes */
+	u32 ntfy_id; /* notification block index */
+	dma_addr_t bus; /* dma address of the descr ring */
+	dma_addr_t q_resources_bus; /* dma address of the queue resources */
+	struct u64_stats_sync statss; /* sync stats for 32bit archs */
+} ____cacheline_aligned;
+
+/* Wraps the info for one irq including the napi struct and the queues
+ * associated with that irq.
+ */
+struct gve_notify_block {
+	__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+	char name[IFNAMSIZ + 16]; /* name registered with the kernel */
+	struct napi_struct napi; /* kernel napi struct for this block */
+	struct gve_priv *priv;
+	struct gve_tx_ring *tx; /* tx rings on this block */
+	struct gve_rx_ring *rx; /* rx rings on this block */
+} ____cacheline_aligned;
+
+/* Tracks allowed and current queue settings */
+struct gve_queue_config {
+	u16 max_queues;
+	u16 num_queues; /* current */
+};
+
+/* Tracks the available and used qpl IDs */
+struct gve_qpl_config {
+	u32 qpl_map_size; /* map memory size */
+	unsigned long *qpl_id_map; /* bitmap of used qpl ids */
+};
+
+struct gve_priv {
+	struct net_device *dev;
+	struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
+	struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
+	struct gve_queue_page_list *qpls; /* array of num qpls */
+	struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
+	dma_addr_t ntfy_block_bus;
+	struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
+	char mgmt_msix_name[IFNAMSIZ + 16];
+	u32 mgmt_msix_idx;
+	__be32 *counter_array; /* array of num_event_counters */
+	dma_addr_t counter_array_bus;
+
+	u16 num_event_counters;
+	u16 tx_desc_cnt; /* num desc per ring */
+	u16 rx_desc_cnt; /* num desc per ring */
+	u16 tx_pages_per_qpl; /* tx buffer length */
+	u16 rx_pages_per_qpl; /* rx buffer length */
+	u64 max_registered_pages;
+	u64 num_registered_pages; /* num pages registered with NIC */
+	u32 rx_copybreak; /* copy packets smaller than this */
+	u16 default_num_queues; /* default num queues to set up */
+
+	struct gve_queue_config tx_cfg;
+	struct gve_queue_config rx_cfg;
+	struct gve_qpl_config qpl_cfg; /* map used QPL ids */
+	u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
+
+	struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
+	__be32 __iomem *db_bar2; /* "array" of doorbells */
+	u32 msg_enable;	/* level for netif* netdev print macros	*/
+	struct pci_dev *pdev;
+
+	/* metrics */
+	u32 tx_timeo_cnt;
+
+	/* Admin queue - see gve_adminq.h*/
+	union gve_adminq_command *adminq;
+	dma_addr_t adminq_bus_addr;
+	u32 adminq_mask; /* masks prod_cnt to adminq size */
+	u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
+
+	struct workqueue_struct *gve_wq;
+	struct work_struct service_task;
+	unsigned long service_task_flags;
+	unsigned long state_flags;
+};
+
+enum gve_service_task_flags {
+	GVE_PRIV_FLAGS_DO_RESET			= BIT(1),
+	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= BIT(2),
+	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= BIT(3),
+};
+
+enum gve_state_flags {
+	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= BIT(1),
+	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= BIT(2),
+	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= BIT(3),
+	GVE_PRIV_FLAGS_NAPI_ENABLED		= BIT(4),
+};
+
+static inline bool gve_get_do_reset(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_set_do_reset(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_reset(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline bool gve_get_reset_in_progress(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_reset_in_progress(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_reset_in_progress(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_probe_in_progress(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_probe_in_progress(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_probe_in_progress(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_set_admin_queue_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_admin_queue_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_resources_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_resources_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_resources_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_rings_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_rings_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_rings_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_napi_enabled(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_set_napi_enabled(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_clear_napi_enabled(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+/* Returns the address of the ntfy_blocks irq doorbell
+ */
+static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
+					       struct gve_notify_block *block)
+{
+	return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+}
+
+/* Returns the index into ntfy_blocks of the given tx ring's block
+ */
+static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+	return queue_idx;
+}
+
+/* Returns the index into ntfy_blocks of the given rx ring's block
+ */
+static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+	return (priv->num_ntfy_blks / 2) + queue_idx;
+}
+
+/* Returns the number of tx queue page lists
+ */
+static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
+{
+	return priv->tx_cfg.num_queues;
+}
+
+/* Returns the number of rx queue page lists
+ */
+static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+{
+	return priv->rx_cfg.num_queues;
+}
+
+/* Returns a pointer to the next available tx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+{
+	int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
+				     priv->qpl_cfg.qpl_map_size);
+
+	/* we are out of tx qpls */
+	if (id >= gve_num_tx_qpls(priv))
+		return NULL;
+
+	set_bit(id, priv->qpl_cfg.qpl_id_map);
+	return &priv->qpls[id];
+}
+
+/* Returns a pointer to the next available rx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+{
+	int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
+				    priv->qpl_cfg.qpl_map_size,
+				    gve_num_tx_qpls(priv));
+
+	/* we are out of rx qpls */
+	if (id == priv->qpl_cfg.qpl_map_size)
+		return NULL;
+
+	set_bit(id, priv->qpl_cfg.qpl_id_map);
+	return &priv->qpls[id];
+}
+
+/* Unassigns the qpl with the given id
+ */
+static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+{
+	clear_bit(id, priv->qpl_cfg.qpl_id_map);
+}
+
+/* Returns the correct dma direction for tx and rx qpls
+ */
+static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
+						      int id)
+{
+	if (id < gve_num_tx_qpls(priv))
+		return DMA_TO_DEVICE;
+	else
+		return DMA_FROM_DEVICE;
+}
+
+/* Returns true if the max mtu allows page recycling */
+static inline bool gve_can_recycle_pages(struct net_device *dev)
+{
+	/* We can't recycle the pages if we can't fit a packet into half a
+	 * page.
+	 */
+	return dev->max_mtu <= PAGE_SIZE / 2;
+}
+
+/* buffers */
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+		   enum dma_data_direction);
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+		   enum dma_data_direction);
+/* tx handling */
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings(struct gve_priv *priv);
+void gve_tx_free_rings(struct gve_priv *priv);
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+				 struct gve_tx_ring *tx);
+/* rx handling */
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
+bool gve_rx_poll(struct gve_notify_block *block, int budget);
+int gve_rx_alloc_rings(struct gve_priv *priv);
+void gve_rx_free_rings(struct gve_priv *priv);
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+		       netdev_features_t feat);
+/* Reset */
+void gve_schedule_reset(struct gve_priv *priv);
+int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+int gve_adjust_queues(struct gve_priv *priv,
+		      struct gve_queue_config new_rx_config,
+		      struct gve_queue_config new_tx_config);
+/* exported by ethtool.c */
+extern const struct ethtool_ops gve_ethtool_ops;
+/* needed by ethtool */
+extern const char gve_version_str[];
+#endif /* _GVE_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
new file mode 100644
index 000000000000..c3ba7baf0107
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_MAX_ADMINQ_RELEASE_CHECK	500
+#define GVE_ADMINQ_SLEEP_LEN		20
+#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK	100
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
+{
+	priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE,
+					  &priv->adminq_bus_addr, GFP_KERNEL);
+	if (unlikely(!priv->adminq))
+		return -ENOMEM;
+
+	priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
+	priv->adminq_prod_cnt = 0;
+
+	/* Setup Admin queue with the device */
+	iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
+		    &priv->reg_bar0->adminq_pfn);
+
+	gve_set_admin_queue_ok(priv);
+	return 0;
+}
+
+void gve_adminq_release(struct gve_priv *priv)
+{
+	int i = 0;
+
+	/* Tell the device the adminq is leaving */
+	iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
+	while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
+		/* If this is reached the device is unrecoverable and still
+		 * holding memory. Continue looping to avoid memory corruption,
+		 * but WARN so it is visible what is going on.
+		 */
+		if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
+			WARN(1, "Unrecoverable platform error!");
+		i++;
+		msleep(GVE_ADMINQ_SLEEP_LEN);
+	}
+	gve_clear_device_rings_ok(priv);
+	gve_clear_device_resources_ok(priv);
+	gve_clear_admin_queue_ok(priv);
+}
+
+void gve_adminq_free(struct device *dev, struct gve_priv *priv)
+{
+	if (!gve_get_admin_queue_ok(priv))
+		return;
+	gve_adminq_release(priv);
+	dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr);
+	gve_clear_admin_queue_ok(priv);
+}
+
+static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+	iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
+}
+
+static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+	int i;
+
+	for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
+		if (ioread32be(&priv->reg_bar0->adminq_event_counter)
+		    == prod_cnt)
+			return true;
+		msleep(GVE_ADMINQ_SLEEP_LEN);
+	}
+
+	return false;
+}
+
+static int gve_adminq_parse_err(struct device *dev, u32 status)
+{
+	if (status != GVE_ADMINQ_COMMAND_PASSED &&
+	    status != GVE_ADMINQ_COMMAND_UNSET)
+		dev_err(dev, "AQ command failed with status %d\n", status);
+
+	switch (status) {
+	case GVE_ADMINQ_COMMAND_PASSED:
+		return 0;
+	case GVE_ADMINQ_COMMAND_UNSET:
+		dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+		return -EINVAL;
+	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
+	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
+	case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
+	case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
+	case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
+		return -EAGAIN;
+	case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
+	case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
+	case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
+	case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
+	case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
+	case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
+		return -EINVAL;
+	case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
+		return -ETIME;
+	case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
+	case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
+		return -EACCES;
+	case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
+		return -ENOMEM;
+	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
+		return -ENOTSUPP;
+	default:
+		dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+		return -EINVAL;
+	}
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ */
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+			   union gve_adminq_command *cmd_orig)
+{
+	union gve_adminq_command *cmd;
+	u32 status = 0;
+	u32 prod_cnt;
+
+	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
+	priv->adminq_prod_cnt++;
+	prod_cnt = priv->adminq_prod_cnt;
+
+	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+
+	gve_adminq_kick_cmd(priv, prod_cnt);
+	if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
+		dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
+		return -ENOTRECOVERABLE;
+	}
+
+	memcpy(cmd_orig, cmd, sizeof(*cmd));
+	status = be32_to_cpu(READ_ONCE(cmd->status));
+	return gve_adminq_parse_err(&priv->pdev->dev, status);
+}
+
+/* The device specifies that the management vector can either be the first irq
+ * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
+ * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
+ * the management vector is first.
+ *
+ * gve arranges the msix vectors so that the management vector is last.
+ */
+#define GVE_NTFY_BLK_BASE_MSIX_IDX	0
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+					  dma_addr_t counter_array_bus_addr,
+					  u32 num_counters,
+					  dma_addr_t db_array_bus_addr,
+					  u32 num_ntfy_blks)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
+	cmd.configure_device_resources =
+		(struct gve_adminq_configure_device_resources) {
+		.counter_array = cpu_to_be64(counter_array_bus_addr),
+		.num_counters = cpu_to_be32(num_counters),
+		.irq_db_addr = cpu_to_be64(db_array_bus_addr),
+		.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
+		.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
+		.ntfy_blk_msix_base_idx =
+					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	struct gve_tx_ring *tx = &priv->tx[queue_index];
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+		.reserved = 0,
+		.queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
+		.tx_ring_addr = cpu_to_be64(tx->bus),
+		.queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
+		.ntfy_id = cpu_to_be32(tx->ntfy_id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	struct gve_rx_ring *rx = &priv->rx[queue_index];
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+		.index = cpu_to_be32(queue_index),
+		.reserved = 0,
+		.ntfy_id = cpu_to_be32(rx->ntfy_id),
+		.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+		.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
+		.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
+		.queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+	cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+	cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_describe_device(struct gve_priv *priv)
+{
+	struct gve_device_descriptor *descriptor;
+	union gve_adminq_command cmd;
+	dma_addr_t descriptor_bus;
+	int err = 0;
+	u8 *mac;
+	u16 mtu;
+
+	memset(&cmd, 0, sizeof(cmd));
+	descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
+					&descriptor_bus, GFP_KERNEL);
+	if (!descriptor)
+		return -ENOMEM;
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
+	cmd.describe_device.device_descriptor_addr =
+						cpu_to_be64(descriptor_bus);
+	cmd.describe_device.device_descriptor_version =
+			cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
+	cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE);
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	if (err)
+		goto free_device_descriptor;
+
+	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
+		netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
+			  priv->tx_desc_cnt);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+	if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
+	    < PAGE_SIZE ||
+	    priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
+	    < PAGE_SIZE) {
+		netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
+			  priv->rx_desc_cnt);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->max_registered_pages =
+				be64_to_cpu(descriptor->max_registered_pages);
+	mtu = be16_to_cpu(descriptor->mtu);
+	if (mtu < ETH_MIN_MTU) {
+		netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
+			  mtu);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->dev->max_mtu = mtu;
+	priv->num_event_counters = be16_to_cpu(descriptor->counters);
+	ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
+	mac = descriptor->mac;
+	netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
+	priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
+	if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
+		netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+			  priv->rx_pages_per_qpl);
+		priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+	}
+	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+
+free_device_descriptor:
+	dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+			  descriptor_bus);
+	return err;
+}
+
+int gve_adminq_register_page_list(struct gve_priv *priv,
+				  struct gve_queue_page_list *qpl)
+{
+	struct device *hdev = &priv->pdev->dev;
+	u32 num_entries = qpl->num_entries;
+	u32 size = num_entries * sizeof(qpl->page_buses[0]);
+	union gve_adminq_command cmd;
+	dma_addr_t page_list_bus;
+	__be64 *page_list;
+	int err;
+	int i;
+
+	memset(&cmd, 0, sizeof(cmd));
+	page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL);
+	if (!page_list)
+		return -ENOMEM;
+
+	for (i = 0; i < num_entries; i++)
+		page_list[i] = cpu_to_be64(qpl->page_buses[i]);
+
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
+	cmd.reg_page_list = (struct gve_adminq_register_page_list) {
+		.page_list_id = cpu_to_be32(qpl->id),
+		.num_pages = cpu_to_be32(num_entries),
+		.page_address_list_addr = cpu_to_be64(page_list_bus),
+	};
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	dma_free_coherent(hdev, size, page_list, page_list_bus);
+	return err;
+}
+
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
+	cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
+		.page_list_id = cpu_to_be32(page_list_id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
+	cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
+		.parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
+		.parameter_value = cpu_to_be64(mtu),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
new file mode 100644
index 000000000000..4dfa06edc0f8
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_ADMINQ_H
+#define _GVE_ADMINQ_H
+
+#include <linux/build_bug.h>
+
+/* Admin queue opcodes */
+enum gve_adminq_opcodes {
+	GVE_ADMINQ_DESCRIBE_DEVICE		= 0x1,
+	GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES	= 0x2,
+	GVE_ADMINQ_REGISTER_PAGE_LIST		= 0x3,
+	GVE_ADMINQ_UNREGISTER_PAGE_LIST		= 0x4,
+	GVE_ADMINQ_CREATE_TX_QUEUE		= 0x5,
+	GVE_ADMINQ_CREATE_RX_QUEUE		= 0x6,
+	GVE_ADMINQ_DESTROY_TX_QUEUE		= 0x7,
+	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
+	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
+	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
+};
+
+/* Admin queue status codes */
+enum gve_adminq_statuses {
+	GVE_ADMINQ_COMMAND_UNSET			= 0x0,
+	GVE_ADMINQ_COMMAND_PASSED			= 0x1,
+	GVE_ADMINQ_COMMAND_ERROR_ABORTED		= 0xFFFFFFF0,
+	GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS		= 0xFFFFFFF1,
+	GVE_ADMINQ_COMMAND_ERROR_CANCELLED		= 0xFFFFFFF2,
+	GVE_ADMINQ_COMMAND_ERROR_DATALOSS		= 0xFFFFFFF3,
+	GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED	= 0xFFFFFFF4,
+	GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION	= 0xFFFFFFF5,
+	GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR		= 0xFFFFFFF6,
+	GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT	= 0xFFFFFFF7,
+	GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND		= 0xFFFFFFF8,
+	GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE		= 0xFFFFFFF9,
+	GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED	= 0xFFFFFFFA,
+	GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED	= 0xFFFFFFFB,
+	GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED	= 0xFFFFFFFC,
+	GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE		= 0xFFFFFFFD,
+	GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED		= 0xFFFFFFFE,
+	GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR		= 0xFFFFFFFF,
+};
+
+#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
+
+/* All AdminQ command structs should be naturally packed. The static_assert
+ * calls make sure this is the case at compile time.
+ */
+
+struct gve_adminq_describe_device {
+	__be64 device_descriptor_addr;
+	__be32 device_descriptor_version;
+	__be32 available_length;
+};
+
+static_assert(sizeof(struct gve_adminq_describe_device) == 16);
+
+struct gve_device_descriptor {
+	__be64 max_registered_pages;
+	__be16 reserved1;
+	__be16 tx_queue_entries;
+	__be16 rx_queue_entries;
+	__be16 default_num_queues;
+	__be16 mtu;
+	__be16 counters;
+	__be16 tx_pages_per_qpl;
+	__be16 rx_pages_per_qpl;
+	u8  mac[ETH_ALEN];
+	__be16 num_device_options;
+	__be16 total_length;
+	u8  reserved2[6];
+};
+
+static_assert(sizeof(struct gve_device_descriptor) == 40);
+
+struct device_option {
+	__be32 option_id;
+	__be32 option_length;
+};
+
+static_assert(sizeof(struct device_option) == 8);
+
+struct gve_adminq_configure_device_resources {
+	__be64 counter_array;
+	__be64 irq_db_addr;
+	__be32 num_counters;
+	__be32 num_irq_dbs;
+	__be32 irq_db_stride;
+	__be32 ntfy_blk_msix_base_idx;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32);
+
+struct gve_adminq_register_page_list {
+	__be32 page_list_id;
+	__be32 num_pages;
+	__be64 page_address_list_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_register_page_list) == 16);
+
+struct gve_adminq_unregister_page_list {
+	__be32 page_list_id;
+};
+
+static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
+
+struct gve_adminq_create_tx_queue {
+	__be32 queue_id;
+	__be32 reserved;
+	__be64 queue_resources_addr;
+	__be64 tx_ring_addr;
+	__be32 queue_page_list_id;
+	__be32 ntfy_id;
+};
+
+static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32);
+
+struct gve_adminq_create_rx_queue {
+	__be32 queue_id;
+	__be32 index;
+	__be32 reserved;
+	__be32 ntfy_id;
+	__be64 queue_resources_addr;
+	__be64 rx_desc_ring_addr;
+	__be64 rx_data_ring_addr;
+	__be32 queue_page_list_id;
+	u8 padding[4];
+};
+
+static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48);
+
+/* Queue resources that are shared with the device */
+struct gve_queue_resources {
+	union {
+		struct {
+			__be32 db_index;	/* Device -> Guest */
+			__be32 counter_index;	/* Device -> Guest */
+		};
+		u8 reserved[64];
+	};
+};
+
+static_assert(sizeof(struct gve_queue_resources) == 64);
+
+struct gve_adminq_destroy_tx_queue {
+	__be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4);
+
+struct gve_adminq_destroy_rx_queue {
+	__be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4);
+
+/* GVE Set Driver Parameter Types */
+enum gve_set_driver_param_types {
+	GVE_SET_PARAM_MTU	= 0x1,
+};
+
+struct gve_adminq_set_driver_parameter {
+	__be32 parameter_type;
+	u8 reserved[4];
+	__be64 parameter_value;
+};
+
+static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
+
+union gve_adminq_command {
+	struct {
+		__be32 opcode;
+		__be32 status;
+		union {
+			struct gve_adminq_configure_device_resources
+						configure_device_resources;
+			struct gve_adminq_create_tx_queue create_tx_queue;
+			struct gve_adminq_create_rx_queue create_rx_queue;
+			struct gve_adminq_destroy_tx_queue destroy_tx_queue;
+			struct gve_adminq_destroy_rx_queue destroy_rx_queue;
+			struct gve_adminq_describe_device describe_device;
+			struct gve_adminq_register_page_list reg_page_list;
+			struct gve_adminq_unregister_page_list unreg_page_list;
+			struct gve_adminq_set_driver_parameter set_driver_param;
+		};
+	};
+	u8 reserved[64];
+};
+
+static_assert(sizeof(union gve_adminq_command) == 64);
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
+void gve_adminq_free(struct device *dev, struct gve_priv *priv);
+void gve_adminq_release(struct gve_priv *priv);
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+			   union gve_adminq_command *cmd_orig);
+int gve_adminq_describe_device(struct gve_priv *priv);
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+					  dma_addr_t counter_array_bus_addr,
+					  u32 num_counters,
+					  dma_addr_t db_array_bus_addr,
+					  u32 num_ntfy_blks);
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_register_page_list(struct gve_priv *priv,
+				  struct gve_queue_page_list *qpl);
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+#endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
new file mode 100644
index 000000000000..54779871d52e
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+/* GVE Transmit Descriptor formats */
+
+#ifndef _GVE_DESC_H_
+#define _GVE_DESC_H_
+
+#include <linux/build_bug.h>
+
+/* A note on seg_addrs
+ *
+ * Base addresses encoded in seg_addr are not assumed to be physical
+ * addresses. The ring format assumes these come from some linear address
+ * space. This could be physical memory, kernel virtual memory, user virtual
+ * memory. gVNIC uses lists of registered pages. Each queue is assumed
+ * to be associated with a single such linear address space to ensure a
+ * consistent meaning for seg_addrs posted to its rings.
+ */
+
+struct gve_tx_pkt_desc {
+	u8	type_flags;  /* desc type is lower 4 bits, flags upper */
+	u8	l4_csum_offset;  /* relative offset of L4 csum word */
+	u8	l4_hdr_offset;  /* Offset of start of L4 headers in packet */
+	u8	desc_cnt;  /* Total descriptors for this packet */
+	__be16	len;  /* Total length of this packet (in bytes) */
+	__be16	seg_len;  /* Length of this descriptor's segment */
+	__be64	seg_addr;  /* Base address (see note) of this segment */
+} __packed;
+
+struct gve_tx_seg_desc {
+	u8	type_flags;	/* type is lower 4 bits, flags upper	*/
+	u8	l3_offset;	/* TSO: 2 byte units to start of IPH	*/
+	__be16	reserved;
+	__be16	mss;		/* TSO MSS				*/
+	__be16	seg_len;
+	__be64	seg_addr;
+} __packed;
+
+/* GVE Transmit Descriptor Types */
+#define	GVE_TXD_STD		(0x0 << 4) /* Std with Host Address	*/
+#define	GVE_TXD_TSO		(0x1 << 4) /* TSO with Host Address	*/
+#define	GVE_TXD_SEG		(0x2 << 4) /* Seg with Host Address	*/
+
+/* GVE Transmit Descriptor Flags for Std Pkts */
+#define	GVE_TXF_L4CSUM	BIT(0)	/* Need csum offload */
+#define	GVE_TXF_TSTAMP	BIT(2)	/* Timestamp required */
+
+/* GVE Transmit Descriptor Flags for TSO Segs */
+#define	GVE_TXSF_IPV6	BIT(1)	/* IPv6 TSO */
+
+/* GVE Receive Packet Descriptor */
+/* The start of an ethernet packet comes 2 bytes into the rx buffer.
+ * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
+ * access is aligned.
+ */
+#define GVE_RX_PAD 2
+
+struct gve_rx_desc {
+	u8	padding[48];
+	__be32	rss_hash;  /* Receive-side scaling hash (Toeplitz for gVNIC) */
+	__be16	mss;
+	__be16	reserved;  /* Reserved to zero */
+	u8	hdr_len;  /* Header length (L2-L4) including padding */
+	u8	hdr_off;  /* 64-byte-scaled offset into RX_DATA entry */
+	__sum16	csum;  /* 1's-complement partial checksum of L3+ bytes */
+	__be16	len;  /* Length of the received packet */
+	__be16	flags_seq;  /* Flags [15:3] and sequence number [2:0] (1-7) */
+} __packed;
+static_assert(sizeof(struct gve_rx_desc) == 64);
+
+/* As with the Tx ring format, the qpl_offset entries below are offsets into an
+ * ordered list of registered pages.
+ */
+struct gve_rx_data_slot {
+	/* byte offset into the rx registered segment of this slot */
+	__be64 qpl_offset;
+};
+
+/* GVE Recive Packet Descriptor Seq No */
+#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
+
+/* GVE Recive Packet Descriptor Flags */
+#define GVE_RXFLG(x)	cpu_to_be16(1 << (3 + (x)))
+#define	GVE_RXF_FRAG	GVE_RXFLG(3)	/* IP Fragment			*/
+#define	GVE_RXF_IPV4	GVE_RXFLG(4)	/* IPv4				*/
+#define	GVE_RXF_IPV6	GVE_RXFLG(5)	/* IPv6				*/
+#define	GVE_RXF_TCP	GVE_RXFLG(6)	/* TCP Packet			*/
+#define	GVE_RXF_UDP	GVE_RXFLG(7)	/* UDP Packet			*/
+#define	GVE_RXF_ERR	GVE_RXFLG(8)	/* Packet Error Detected	*/
+
+/* GVE IRQ */
+#define GVE_IRQ_ACK	BIT(31)
+#define GVE_IRQ_MASK	BIT(30)
+#define GVE_IRQ_EVENT	BIT(29)
+
+static inline bool gve_needs_rss(__be16 flag)
+{
+	if (flag & GVE_RXF_FRAG)
+		return false;
+	if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+		return true;
+	return false;
+}
+
+static inline u8 gve_next_seqno(u8 seq)
+{
+	return (seq + 1) == 8 ? 1 : seq + 1;
+}
+#endif /* _GVE_DESC_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
new file mode 100644
index 000000000000..26540b856541
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/rtnetlink.h>
+#include "gve.h"
+
+static void gve_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *info)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	strlcpy(info->driver, "gve", sizeof(info->driver));
+	strlcpy(info->version, gve_version_str, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
+}
+
+static void gve_set_msglevel(struct net_device *netdev, u32 value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	priv->msg_enable = value;
+}
+
+static u32 gve_get_msglevel(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	return priv->msg_enable;
+}
+
+static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
+	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
+	"rx_dropped", "tx_dropped", "tx_timeouts",
+};
+
+#define GVE_MAIN_STATS_LEN  ARRAY_SIZE(gve_gstrings_main_stats)
+#define NUM_GVE_TX_CNTS	5
+#define NUM_GVE_RX_CNTS	2
+
+static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	char *s = (char *)data;
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	memcpy(s, *gve_gstrings_main_stats,
+	       sizeof(gve_gstrings_main_stats));
+	s += sizeof(gve_gstrings_main_stats);
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
+		s += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
+		s += ETH_GSTRING_LEN;
+	}
+}
+
+static int gve_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		return GVE_MAIN_STATS_LEN +
+		       (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
+		       (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+gve_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats *stats, u64 *data)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
+	unsigned int start;
+	int ring;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (rx_pkts = 0, rx_bytes = 0, ring = 0;
+	     ring < priv->rx_cfg.num_queues; ring++) {
+		if (priv->rx) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				rx_pkts += priv->rx[ring].rpackets;
+				rx_bytes += priv->rx[ring].rbytes;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+		}
+	}
+	for (tx_pkts = 0, tx_bytes = 0, ring = 0;
+	     ring < priv->tx_cfg.num_queues; ring++) {
+		if (priv->tx) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				tx_pkts += priv->tx[ring].pkt_done;
+				tx_bytes += priv->tx[ring].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+						       start));
+		}
+	}
+
+	i = 0;
+	data[i++] = rx_pkts;
+	data[i++] = tx_pkts;
+	data[i++] = rx_bytes;
+	data[i++] = tx_bytes;
+	/* Skip rx_dropped and tx_dropped */
+	i += 2;
+	data[i++] = priv->tx_timeo_cnt;
+	i = GVE_MAIN_STATS_LEN;
+
+	/* walk RX rings */
+	if (priv->rx) {
+		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+			struct gve_rx_ring *rx = &priv->rx[ring];
+
+			data[i++] = rx->desc.cnt;
+			data[i++] = rx->desc.fill_cnt;
+		}
+	} else {
+		i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
+	}
+	/* walk TX rings */
+	if (priv->tx) {
+		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+			struct gve_tx_ring *tx = &priv->tx[ring];
+
+			data[i++] = tx->req;
+			data[i++] = tx->done;
+			data[i++] = tx->wake_queue;
+			data[i++] = tx->stop_queue;
+			data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
+									  tx));
+		}
+	} else {
+		i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
+	}
+}
+
+static void gve_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	cmd->max_rx = priv->rx_cfg.max_queues;
+	cmd->max_tx = priv->tx_cfg.max_queues;
+	cmd->max_other = 0;
+	cmd->max_combined = 0;
+	cmd->rx_count = priv->rx_cfg.num_queues;
+	cmd->tx_count = priv->tx_cfg.num_queues;
+	cmd->other_count = 0;
+	cmd->combined_count = 0;
+}
+
+static int gve_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	struct gve_queue_config new_tx_cfg = priv->tx_cfg;
+	struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+	struct ethtool_channels old_settings;
+	int new_tx = cmd->tx_count;
+	int new_rx = cmd->rx_count;
+
+	gve_get_channels(netdev, &old_settings);
+
+	/* Changing combined is not allowed allowed */
+	if (cmd->combined_count != old_settings.combined_count)
+		return -EINVAL;
+
+	if (!new_rx || !new_tx)
+		return -EINVAL;
+
+	if (!netif_carrier_ok(netdev)) {
+		priv->tx_cfg.num_queues = new_tx;
+		priv->rx_cfg.num_queues = new_rx;
+		return 0;
+	}
+
+	new_tx_cfg.num_queues = new_tx;
+	new_rx_cfg.num_queues = new_rx;
+
+	return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+}
+
+static void gve_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	cmd->rx_max_pending = priv->rx_desc_cnt;
+	cmd->tx_max_pending = priv->tx_desc_cnt;
+	cmd->rx_pending = priv->rx_desc_cnt;
+	cmd->tx_pending = priv->tx_desc_cnt;
+}
+
+static int gve_user_reset(struct net_device *netdev, u32 *flags)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	if (*flags == ETH_RESET_ALL) {
+		*flags = 0;
+		return gve_reset(priv, true);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+const struct ethtool_ops gve_ethtool_ops = {
+	.get_drvinfo = gve_get_drvinfo,
+	.get_strings = gve_get_strings,
+	.get_sset_count = gve_get_sset_count,
+	.get_ethtool_stats = gve_get_ethtool_stats,
+	.set_msglevel = gve_set_msglevel,
+	.get_msglevel = gve_get_msglevel,
+	.set_channels = gve_set_channels,
+	.get_channels = gve_get_channels,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = gve_get_ringparam,
+	.reset = gve_user_reset,
+};
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
new file mode 100644
index 000000000000..24f16e3368cd
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -0,0 +1,1232 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <net/sch_generic.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_DEFAULT_RX_COPYBREAK	(256)
+
+#define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
+#define GVE_VERSION		"1.0.0"
+#define GVE_VERSION_PREFIX	"GVE-"
+
+const char gve_version_str[] = GVE_VERSION;
+static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+
+static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	unsigned int start;
+	int ring;
+
+	if (priv->rx) {
+		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				s->rx_packets += priv->rx[ring].rpackets;
+				s->rx_bytes += priv->rx[ring].rbytes;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+		}
+	}
+	if (priv->tx) {
+		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				s->tx_packets += priv->tx[ring].pkt_done;
+				s->tx_bytes += priv->tx[ring].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+		}
+	}
+}
+
+static int gve_alloc_counter_array(struct gve_priv *priv)
+{
+	priv->counter_array =
+		dma_alloc_coherent(&priv->pdev->dev,
+				   priv->num_event_counters *
+				   sizeof(*priv->counter_array),
+				   &priv->counter_array_bus, GFP_KERNEL);
+	if (!priv->counter_array)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void gve_free_counter_array(struct gve_priv *priv)
+{
+	dma_free_coherent(&priv->pdev->dev,
+			  priv->num_event_counters *
+			  sizeof(*priv->counter_array),
+			  priv->counter_array, priv->counter_array_bus);
+	priv->counter_array = NULL;
+}
+
+static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
+{
+	struct gve_priv *priv = arg;
+
+	queue_work(priv->gve_wq, &priv->service_task);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t gve_intr(int irq, void *arg)
+{
+	struct gve_notify_block *block = arg;
+	struct gve_priv *priv = block->priv;
+
+	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
+	napi_schedule_irqoff(&block->napi);
+	return IRQ_HANDLED;
+}
+
+static int gve_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct gve_notify_block *block;
+	__be32 __iomem *irq_doorbell;
+	bool reschedule = false;
+	struct gve_priv *priv;
+
+	block = container_of(napi, struct gve_notify_block, napi);
+	priv = block->priv;
+
+	if (block->tx)
+		reschedule |= gve_tx_poll(block, budget);
+	if (block->rx)
+		reschedule |= gve_rx_poll(block, budget);
+
+	if (reschedule)
+		return budget;
+
+	napi_complete(napi);
+	irq_doorbell = gve_irq_doorbell(priv, block);
+	iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
+
+	/* Double check we have no extra work.
+	 * Ensure unmask synchronizes with checking for work.
+	 */
+	dma_rmb();
+	if (block->tx)
+		reschedule |= gve_tx_poll(block, -1);
+	if (block->rx)
+		reschedule |= gve_rx_poll(block, -1);
+	if (reschedule && napi_reschedule(napi))
+		iowrite32be(GVE_IRQ_MASK, irq_doorbell);
+
+	return 0;
+}
+
+static int gve_alloc_notify_blocks(struct gve_priv *priv)
+{
+	int num_vecs_requested = priv->num_ntfy_blks + 1;
+	char *name = priv->dev->name;
+	unsigned int active_cpus;
+	int vecs_enabled;
+	int i, j;
+	int err;
+
+	priv->msix_vectors = kvzalloc(num_vecs_requested *
+				      sizeof(*priv->msix_vectors), GFP_KERNEL);
+	if (!priv->msix_vectors)
+		return -ENOMEM;
+	for (i = 0; i < num_vecs_requested; i++)
+		priv->msix_vectors[i].entry = i;
+	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
+					     GVE_MIN_MSIX, num_vecs_requested);
+	if (vecs_enabled < 0) {
+		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
+			GVE_MIN_MSIX, vecs_enabled);
+		err = vecs_enabled;
+		goto abort_with_msix_vectors;
+	}
+	if (vecs_enabled != num_vecs_requested) {
+		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
+		int vecs_per_type = new_num_ntfy_blks / 2;
+		int vecs_left = new_num_ntfy_blks % 2;
+
+		priv->num_ntfy_blks = new_num_ntfy_blks;
+		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
+						vecs_per_type);
+		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
+						vecs_per_type + vecs_left);
+		dev_err(&priv->pdev->dev,
+			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
+			vecs_enabled, priv->tx_cfg.max_queues,
+			priv->rx_cfg.max_queues);
+		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
+			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
+			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+	}
+	/* Half the notification blocks go to TX and half to RX */
+	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
+
+	/* Setup Management Vector  - the last vector */
+	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
+		 name);
+	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
+			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
+	if (err) {
+		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
+		goto abort_with_msix_enabled;
+	}
+	priv->ntfy_blocks =
+		dma_alloc_coherent(&priv->pdev->dev,
+				   priv->num_ntfy_blks *
+				   sizeof(*priv->ntfy_blocks),
+				   &priv->ntfy_block_bus, GFP_KERNEL);
+	if (!priv->ntfy_blocks) {
+		err = -ENOMEM;
+		goto abort_with_mgmt_vector;
+	}
+	/* Setup the other blocks - the first n-1 vectors */
+	for (i = 0; i < priv->num_ntfy_blks; i++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[i];
+		int msix_idx = i;
+
+		snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
+			 name, i);
+		block->priv = priv;
+		err = request_irq(priv->msix_vectors[msix_idx].vector,
+				  gve_intr, 0, block->name, block);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Failed to receive msix vector %d\n", i);
+			goto abort_with_some_ntfy_blocks;
+		}
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      get_cpu_mask(i % active_cpus));
+	}
+	return 0;
+abort_with_some_ntfy_blocks:
+	for (j = 0; j < i; j++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[j];
+		int msix_idx = j;
+
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      NULL);
+		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	}
+	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+			  sizeof(*priv->ntfy_blocks),
+			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	priv->ntfy_blocks = NULL;
+abort_with_mgmt_vector:
+	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+abort_with_msix_enabled:
+	pci_disable_msix(priv->pdev);
+abort_with_msix_vectors:
+	kfree(priv->msix_vectors);
+	priv->msix_vectors = NULL;
+	return err;
+}
+
+static void gve_free_notify_blocks(struct gve_priv *priv)
+{
+	int i;
+
+	/* Free the irqs */
+	for (i = 0; i < priv->num_ntfy_blks; i++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[i];
+		int msix_idx = i;
+
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      NULL);
+		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	}
+	dma_free_coherent(&priv->pdev->dev,
+			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
+			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	priv->ntfy_blocks = NULL;
+	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+	pci_disable_msix(priv->pdev);
+	kfree(priv->msix_vectors);
+	priv->msix_vectors = NULL;
+}
+
+static int gve_setup_device_resources(struct gve_priv *priv)
+{
+	int err;
+
+	err = gve_alloc_counter_array(priv);
+	if (err)
+		return err;
+	err = gve_alloc_notify_blocks(priv);
+	if (err)
+		goto abort_with_counter;
+	err = gve_adminq_configure_device_resources(priv,
+						    priv->counter_array_bus,
+						    priv->num_event_counters,
+						    priv->ntfy_block_bus,
+						    priv->num_ntfy_blks);
+	if (unlikely(err)) {
+		dev_err(&priv->pdev->dev,
+			"could not setup device_resources: err=%d\n", err);
+		err = -ENXIO;
+		goto abort_with_ntfy_blocks;
+	}
+	gve_set_device_resources_ok(priv);
+	return 0;
+abort_with_ntfy_blocks:
+	gve_free_notify_blocks(priv);
+abort_with_counter:
+	gve_free_counter_array(priv);
+	return err;
+}
+
+static void gve_trigger_reset(struct gve_priv *priv);
+
+static void gve_teardown_device_resources(struct gve_priv *priv)
+{
+	int err;
+
+	/* Tell device its resources are being freed */
+	if (gve_get_device_resources_ok(priv)) {
+		err = gve_adminq_deconfigure_device_resources(priv);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Could not deconfigure device resources: err=%d\n",
+				err);
+			gve_trigger_reset(priv);
+		}
+	}
+	gve_free_counter_array(priv);
+	gve_free_notify_blocks(priv);
+	gve_clear_device_resources_ok(priv);
+}
+
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+{
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+	netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+		       NAPI_POLL_WEIGHT);
+}
+
+static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+{
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+	netif_napi_del(&block->napi);
+}
+
+static int gve_register_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int err;
+	int i;
+
+	for (i = 0; i < num_qpls; i++) {
+		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to register queue page list %d\n",
+				  priv->qpls[i].id);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int gve_unregister_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int err;
+	int i;
+
+	for (i = 0; i < num_qpls; i++) {
+		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+		/* This failure will trigger a reset - no need to clean up */
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to unregister queue page list %d\n",
+				  priv->qpls[i].id);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int gve_create_rings(struct gve_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_adminq_create_tx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
+	}
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_adminq_create_rx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		/* Rx data ring has been prefilled with packet buffers at
+		 * queue allocation time.
+		 * Write the doorbell to provide descriptor slots and packet
+		 * buffers to the NIC.
+		 */
+		gve_rx_write_doorbell(priv, &priv->rx[i]);
+		netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
+	}
+
+	return 0;
+}
+
+static int gve_alloc_rings(struct gve_priv *priv)
+{
+	int ntfy_idx;
+	int err;
+	int i;
+
+	/* Setup tx rings */
+	priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
+			    GFP_KERNEL);
+	if (!priv->tx)
+		return -ENOMEM;
+	err = gve_tx_alloc_rings(priv);
+	if (err)
+		goto free_tx;
+	/* Setup rx rings */
+	priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
+			    GFP_KERNEL);
+	if (!priv->rx) {
+		err = -ENOMEM;
+		goto free_tx_queue;
+	}
+	err = gve_rx_alloc_rings(priv);
+	if (err)
+		goto free_rx;
+	/* Add tx napi & init sync stats*/
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		u64_stats_init(&priv->tx[i].statss);
+		ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+		gve_add_napi(priv, ntfy_idx);
+	}
+	/* Add rx napi  & init sync stats*/
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		u64_stats_init(&priv->rx[i].statss);
+		ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+		gve_add_napi(priv, ntfy_idx);
+	}
+
+	return 0;
+
+free_rx:
+	kfree(priv->rx);
+	priv->rx = NULL;
+free_tx_queue:
+	gve_tx_free_rings(priv);
+free_tx:
+	kfree(priv->tx);
+	priv->tx = NULL;
+	return err;
+}
+
+static int gve_destroy_rings(struct gve_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_adminq_destroy_tx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to destroy tx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+	}
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_adminq_destroy_rx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to destroy rx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+	}
+	return 0;
+}
+
+static void gve_free_rings(struct gve_priv *priv)
+{
+	int ntfy_idx;
+	int i;
+
+	if (priv->tx) {
+		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+			gve_remove_napi(priv, ntfy_idx);
+		}
+		gve_tx_free_rings(priv);
+		kfree(priv->tx);
+		priv->tx = NULL;
+	}
+	if (priv->rx) {
+		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+			gve_remove_napi(priv, ntfy_idx);
+		}
+		gve_rx_free_rings(priv);
+		kfree(priv->rx);
+		priv->rx = NULL;
+	}
+}
+
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+		   enum dma_data_direction dir)
+{
+	*page = alloc_page(GFP_KERNEL);
+	if (!*page)
+		return -ENOMEM;
+	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
+	if (dma_mapping_error(dev, *dma)) {
+		put_page(*page);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
+				     int pages)
+{
+	struct gve_queue_page_list *qpl = &priv->qpls[id];
+	int err;
+	int i;
+
+	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
+		netif_err(priv, drv, priv->dev,
+			  "Reached max number of registered pages %llu > %llu\n",
+			  pages + priv->num_registered_pages,
+			  priv->max_registered_pages);
+		return -EINVAL;
+	}
+
+	qpl->id = id;
+	qpl->num_entries = pages;
+	qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
+	/* caller handles clean up */
+	if (!qpl->pages)
+		return -ENOMEM;
+	qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
+				   GFP_KERNEL);
+	/* caller handles clean up */
+	if (!qpl->page_buses)
+		return -ENOMEM;
+
+	for (i = 0; i < pages; i++) {
+		err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+				     &qpl->page_buses[i],
+				     gve_qpl_dma_dir(priv, id));
+		/* caller handles clean up */
+		if (err)
+			return -ENOMEM;
+	}
+	priv->num_registered_pages += pages;
+
+	return 0;
+}
+
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+		   enum dma_data_direction dir)
+{
+	if (!dma_mapping_error(dev, dma))
+		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
+	if (page)
+		put_page(page);
+}
+
+static void gve_free_queue_page_list(struct gve_priv *priv,
+				     int id)
+{
+	struct gve_queue_page_list *qpl = &priv->qpls[id];
+	int i;
+
+	if (!qpl->pages)
+		return;
+	if (!qpl->page_buses)
+		goto free_pages;
+
+	for (i = 0; i < qpl->num_entries; i++)
+		gve_free_page(&priv->pdev->dev, qpl->pages[i],
+			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
+
+	kfree(qpl->page_buses);
+free_pages:
+	kfree(qpl->pages);
+	priv->num_registered_pages -= qpl->num_entries;
+}
+
+static int gve_alloc_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int i, j;
+	int err;
+
+	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
+	if (!priv->qpls)
+		return -ENOMEM;
+
+	for (i = 0; i < gve_num_tx_qpls(priv); i++) {
+		err = gve_alloc_queue_page_list(priv, i,
+						priv->tx_pages_per_qpl);
+		if (err)
+			goto free_qpls;
+	}
+	for (; i < num_qpls; i++) {
+		err = gve_alloc_queue_page_list(priv, i,
+						priv->rx_pages_per_qpl);
+		if (err)
+			goto free_qpls;
+	}
+
+	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
+				     sizeof(unsigned long) * BITS_PER_BYTE;
+	priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
+					    sizeof(unsigned long), GFP_KERNEL);
+	if (!priv->qpl_cfg.qpl_id_map) {
+		err = -ENOMEM;
+		goto free_qpls;
+	}
+
+	return 0;
+
+free_qpls:
+	for (j = 0; j <= i; j++)
+		gve_free_queue_page_list(priv, j);
+	kfree(priv->qpls);
+	return err;
+}
+
+static void gve_free_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int i;
+
+	kfree(priv->qpl_cfg.qpl_id_map);
+
+	for (i = 0; i < num_qpls; i++)
+		gve_free_queue_page_list(priv, i);
+
+	kfree(priv->qpls);
+}
+
+/* Use this to schedule a reset when the device is capable of continuing
+ * to handle other requests in its current state. If it is not, do a reset
+ * in thread instead.
+ */
+void gve_schedule_reset(struct gve_priv *priv)
+{
+	gve_set_do_reset(priv);
+	queue_work(priv->gve_wq, &priv->service_task);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
+static void gve_turndown(struct gve_priv *priv);
+static void gve_turnup(struct gve_priv *priv);
+
+static int gve_open(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = gve_alloc_qpls(priv);
+	if (err)
+		return err;
+	err = gve_alloc_rings(priv);
+	if (err)
+		goto free_qpls;
+
+	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
+	if (err)
+		goto free_rings;
+	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
+	if (err)
+		goto free_rings;
+
+	err = gve_register_qpls(priv);
+	if (err)
+		goto reset;
+	err = gve_create_rings(priv);
+	if (err)
+		goto reset;
+	gve_set_device_rings_ok(priv);
+
+	gve_turnup(priv);
+	netif_carrier_on(dev);
+	return 0;
+
+free_rings:
+	gve_free_rings(priv);
+free_qpls:
+	gve_free_qpls(priv);
+	return err;
+
+reset:
+	/* This must have been called from a reset due to the rtnl lock
+	 * so just return at this point.
+	 */
+	if (gve_get_reset_in_progress(priv))
+		return err;
+	/* Otherwise reset before returning */
+	gve_reset_and_teardown(priv, true);
+	/* if this fails there is nothing we can do so just ignore the return */
+	gve_reset_recovery(priv, false);
+	/* return the original error */
+	return err;
+}
+
+static int gve_close(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	int err;
+
+	netif_carrier_off(dev);
+	if (gve_get_device_rings_ok(priv)) {
+		gve_turndown(priv);
+		err = gve_destroy_rings(priv);
+		if (err)
+			goto err;
+		err = gve_unregister_qpls(priv);
+		if (err)
+			goto err;
+		gve_clear_device_rings_ok(priv);
+	}
+
+	gve_free_rings(priv);
+	gve_free_qpls(priv);
+	return 0;
+
+err:
+	/* This must have been called from a reset due to the rtnl lock
+	 * so just return at this point.
+	 */
+	if (gve_get_reset_in_progress(priv))
+		return err;
+	/* Otherwise reset before returning */
+	gve_reset_and_teardown(priv, true);
+	return gve_reset_recovery(priv, false);
+}
+
+int gve_adjust_queues(struct gve_priv *priv,
+		      struct gve_queue_config new_rx_config,
+		      struct gve_queue_config new_tx_config)
+{
+	int err;
+
+	if (netif_carrier_ok(priv->dev)) {
+		/* To make this process as simple as possible we teardown the
+		 * device, set the new configuration, and then bring the device
+		 * up again.
+		 */
+		err = gve_close(priv->dev);
+		/* we have already tried to reset in close,
+		 * just fail at this point
+		 */
+		if (err)
+			return err;
+		priv->tx_cfg = new_tx_config;
+		priv->rx_cfg = new_rx_config;
+
+		err = gve_open(priv->dev);
+		if (err)
+			goto err;
+
+		return 0;
+	}
+	/* Set the config for the next up. */
+	priv->tx_cfg = new_tx_config;
+	priv->rx_cfg = new_rx_config;
+
+	return 0;
+err:
+	netif_err(priv, drv, priv->dev,
+		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
+	gve_turndown(priv);
+	return err;
+}
+
+static void gve_turndown(struct gve_priv *priv)
+{
+	int idx;
+
+	if (netif_carrier_ok(priv->dev))
+		netif_carrier_off(priv->dev);
+
+	if (!gve_get_napi_enabled(priv))
+		return;
+
+	/* Disable napi to prevent more work from coming in */
+	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_disable(&block->napi);
+	}
+	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_disable(&block->napi);
+	}
+
+	/* Stop tx queues */
+	netif_tx_disable(priv->dev);
+
+	gve_clear_napi_enabled(priv);
+}
+
+static void gve_turnup(struct gve_priv *priv)
+{
+	int idx;
+
+	/* Start the tx queues */
+	netif_tx_start_all_queues(priv->dev);
+
+	/* Enable napi and unmask interrupts for all queues */
+	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_enable(&block->napi);
+		iowrite32be(0, gve_irq_doorbell(priv, block));
+	}
+	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_enable(&block->napi);
+		iowrite32be(0, gve_irq_doorbell(priv, block));
+	}
+
+	gve_set_napi_enabled(priv);
+}
+
+static void gve_tx_timeout(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+
+	gve_schedule_reset(priv);
+	priv->tx_timeo_cnt++;
+}
+
+static const struct net_device_ops gve_netdev_ops = {
+	.ndo_start_xmit		=	gve_tx,
+	.ndo_open		=	gve_open,
+	.ndo_stop		=	gve_close,
+	.ndo_get_stats64	=	gve_get_stats,
+	.ndo_tx_timeout         =       gve_tx_timeout,
+};
+
+static void gve_handle_status(struct gve_priv *priv, u32 status)
+{
+	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
+		dev_info(&priv->pdev->dev, "Device requested reset.\n");
+		gve_set_do_reset(priv);
+	}
+}
+
+static void gve_handle_reset(struct gve_priv *priv)
+{
+	/* A service task will be scheduled at the end of probe to catch any
+	 * resets that need to happen, and we don't want to reset until
+	 * probe is done.
+	 */
+	if (gve_get_probe_in_progress(priv))
+		return;
+
+	if (gve_get_do_reset(priv)) {
+		rtnl_lock();
+		gve_reset(priv, false);
+		rtnl_unlock();
+	}
+}
+
+/* Handle NIC status register changes and reset requests */
+static void gve_service_task(struct work_struct *work)
+{
+	struct gve_priv *priv = container_of(work, struct gve_priv,
+					     service_task);
+
+	gve_handle_status(priv,
+			  ioread32be(&priv->reg_bar0->device_status));
+
+	gve_handle_reset(priv);
+}
+
+static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
+{
+	int num_ntfy;
+	int err;
+
+	/* Set up the adminq */
+	err = gve_adminq_alloc(&priv->pdev->dev, priv);
+	if (err) {
+		dev_err(&priv->pdev->dev,
+			"Failed to alloc admin queue: err=%d\n", err);
+		return err;
+	}
+
+	if (skip_describe_device)
+		goto setup_device;
+
+	/* Get the initial information we need from the device */
+	err = gve_adminq_describe_device(priv);
+	if (err) {
+		dev_err(&priv->pdev->dev,
+			"Could not get device information: err=%d\n", err);
+		goto err;
+	}
+	if (priv->dev->max_mtu > PAGE_SIZE) {
+		priv->dev->max_mtu = PAGE_SIZE;
+		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "Could not set mtu");
+			goto err;
+		}
+	}
+	priv->dev->mtu = priv->dev->max_mtu;
+	num_ntfy = pci_msix_vec_count(priv->pdev);
+	if (num_ntfy <= 0) {
+		dev_err(&priv->pdev->dev,
+			"could not count MSI-x vectors: err=%d\n", num_ntfy);
+		err = num_ntfy;
+		goto err;
+	} else if (num_ntfy < GVE_MIN_MSIX) {
+		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
+			GVE_MIN_MSIX, num_ntfy);
+		err = -EINVAL;
+		goto err;
+	}
+
+	priv->num_registered_pages = 0;
+	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
+	/* gvnic has one Notification Block per MSI-x vector, except for the
+	 * management vector
+	 */
+	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
+	priv->mgmt_msix_idx = priv->num_ntfy_blks;
+
+	priv->tx_cfg.max_queues =
+		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
+	priv->rx_cfg.max_queues =
+		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
+
+	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+	if (priv->default_num_queues > 0) {
+		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
+						priv->tx_cfg.num_queues);
+		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
+						priv->rx_cfg.num_queues);
+	}
+
+	netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
+		   priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+	netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
+		   priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+
+setup_device:
+	err = gve_setup_device_resources(priv);
+	if (!err)
+		return 0;
+err:
+	gve_adminq_free(&priv->pdev->dev, priv);
+	return err;
+}
+
+static void gve_teardown_priv_resources(struct gve_priv *priv)
+{
+	gve_teardown_device_resources(priv);
+	gve_adminq_free(&priv->pdev->dev, priv);
+}
+
+static void gve_trigger_reset(struct gve_priv *priv)
+{
+	/* Reset the device by releasing the AQ */
+	gve_adminq_release(priv);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
+{
+	gve_trigger_reset(priv);
+	/* With the reset having already happened, close cannot fail */
+	if (was_up)
+		gve_close(priv->dev);
+	gve_teardown_priv_resources(priv);
+}
+
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
+{
+	int err;
+
+	err = gve_init_priv(priv, true);
+	if (err)
+		goto err;
+	if (was_up) {
+		err = gve_open(priv->dev);
+		if (err)
+			goto err;
+	}
+	return 0;
+err:
+	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
+	gve_turndown(priv);
+	return err;
+}
+
+int gve_reset(struct gve_priv *priv, bool attempt_teardown)
+{
+	bool was_up = netif_carrier_ok(priv->dev);
+	int err;
+
+	dev_info(&priv->pdev->dev, "Performing reset\n");
+	gve_clear_do_reset(priv);
+	gve_set_reset_in_progress(priv);
+	/* If we aren't attempting to teardown normally, just go turndown and
+	 * reset right away.
+	 */
+	if (!attempt_teardown) {
+		gve_turndown(priv);
+		gve_reset_and_teardown(priv, was_up);
+	} else {
+		/* Otherwise attempt to close normally */
+		if (was_up) {
+			err = gve_close(priv->dev);
+			/* If that fails reset as we did above */
+			if (err)
+				gve_reset_and_teardown(priv, was_up);
+		}
+		/* Clean up any remaining resources */
+		gve_teardown_priv_resources(priv);
+	}
+
+	/* Set it all back up */
+	err = gve_reset_recovery(priv, was_up);
+	gve_clear_reset_in_progress(priv);
+	return err;
+}
+
+static void gve_write_version(u8 __iomem *driver_version_register)
+{
+	const char *c = gve_version_prefix;
+
+	while (*c) {
+		writeb(*c, driver_version_register);
+		c++;
+	}
+
+	c = gve_version_str;
+	while (*c) {
+		writeb(*c, driver_version_register);
+		c++;
+	}
+	writeb('\n', driver_version_register);
+}
+
+static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int max_tx_queues, max_rx_queues;
+	struct net_device *dev;
+	__be32 __iomem *db_bar;
+	struct gve_registers __iomem *reg_bar;
+	struct gve_priv *priv;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return -ENXIO;
+
+	err = pci_request_regions(pdev, "gvnic-cfg");
+	if (err)
+		goto abort_with_enabled;
+
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
+		goto abort_with_pci_region;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"Failed to set consistent dma mask: err=%d\n", err);
+		goto abort_with_pci_region;
+	}
+
+	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
+	if (!reg_bar) {
+		dev_err(&pdev->dev, "Failed to map pci bar!\n");
+		err = -ENOMEM;
+		goto abort_with_pci_region;
+	}
+
+	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
+	if (!db_bar) {
+		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
+		err = -ENOMEM;
+		goto abort_with_reg_bar;
+	}
+
+	gve_write_version(&reg_bar->driver_version);
+	/* Get max queues to alloc etherdev */
+	max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
+	max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
+	/* Alloc and setup the netdev and priv */
+	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
+	if (!dev) {
+		dev_err(&pdev->dev, "could not allocate netdev\n");
+		goto abort_with_db_bar;
+	}
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	pci_set_drvdata(pdev, dev);
+	dev->ethtool_ops = &gve_ethtool_ops;
+	dev->netdev_ops = &gve_netdev_ops;
+	/* advertise features */
+	dev->hw_features = NETIF_F_HIGHDMA;
+	dev->hw_features |= NETIF_F_SG;
+	dev->hw_features |= NETIF_F_HW_CSUM;
+	dev->hw_features |= NETIF_F_TSO;
+	dev->hw_features |= NETIF_F_TSO6;
+	dev->hw_features |= NETIF_F_TSO_ECN;
+	dev->hw_features |= NETIF_F_RXCSUM;
+	dev->hw_features |= NETIF_F_RXHASH;
+	dev->features = dev->hw_features;
+	dev->watchdog_timeo = 5 * HZ;
+	dev->min_mtu = ETH_MIN_MTU;
+	netif_carrier_off(dev);
+
+	priv = netdev_priv(dev);
+	priv->dev = dev;
+	priv->pdev = pdev;
+	priv->msg_enable = DEFAULT_MSG_LEVEL;
+	priv->reg_bar0 = reg_bar;
+	priv->db_bar2 = db_bar;
+	priv->service_task_flags = 0x0;
+	priv->state_flags = 0x0;
+
+	gve_set_probe_in_progress(priv);
+	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
+	if (!priv->gve_wq) {
+		dev_err(&pdev->dev, "Could not allocate workqueue");
+		err = -ENOMEM;
+		goto abort_with_netdev;
+	}
+	INIT_WORK(&priv->service_task, gve_service_task);
+	priv->tx_cfg.max_queues = max_tx_queues;
+	priv->rx_cfg.max_queues = max_rx_queues;
+
+	err = gve_init_priv(priv, false);
+	if (err)
+		goto abort_with_wq;
+
+	err = register_netdev(dev);
+	if (err)
+		goto abort_with_wq;
+
+	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
+	gve_clear_probe_in_progress(priv);
+	queue_work(priv->gve_wq, &priv->service_task);
+	return 0;
+
+abort_with_wq:
+	destroy_workqueue(priv->gve_wq);
+
+abort_with_netdev:
+	free_netdev(dev);
+
+abort_with_db_bar:
+	pci_iounmap(pdev, db_bar);
+
+abort_with_reg_bar:
+	pci_iounmap(pdev, reg_bar);
+
+abort_with_pci_region:
+	pci_release_regions(pdev);
+
+abort_with_enabled:
+	pci_disable_device(pdev);
+	return -ENXIO;
+}
+EXPORT_SYMBOL(gve_probe);
+
+static void gve_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct gve_priv *priv = netdev_priv(netdev);
+	__be32 __iomem *db_bar = priv->db_bar2;
+	void __iomem *reg_bar = priv->reg_bar0;
+
+	unregister_netdev(netdev);
+	gve_teardown_priv_resources(priv);
+	destroy_workqueue(priv->gve_wq);
+	free_netdev(netdev);
+	pci_iounmap(pdev, db_bar);
+	pci_iounmap(pdev, reg_bar);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id gve_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
+	{ }
+};
+
+static struct pci_driver gvnic_driver = {
+	.name		= "gvnic",
+	.id_table	= gve_id_table,
+	.probe		= gve_probe,
+	.remove		= gve_remove,
+};
+
+module_pci_driver(gvnic_driver);
+
+MODULE_DEVICE_TABLE(pci, gve_id_table);
+MODULE_AUTHOR("Google, Inc.");
+MODULE_DESCRIPTION("gVNIC Driver");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_VERSION(GVE_VERSION);
diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
new file mode 100644
index 000000000000..84ab8893aadd
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_register.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_REGISTER_H_
+#define _GVE_REGISTER_H_
+
+/* Fixed Configuration Registers */
+struct gve_registers {
+	__be32	device_status;
+	__be32	driver_status;
+	__be32	max_tx_queues;
+	__be32	max_rx_queues;
+	__be32	adminq_pfn;
+	__be32	adminq_doorbell;
+	__be32	adminq_event_counter;
+	u8	reserved[3];
+	u8	driver_version;
+};
+
+enum gve_device_status_flags {
+	GVE_DEVICE_STATUS_RESET_MASK		= BIT(1),
+	GVE_DEVICE_STATUS_LINK_STATUS_MASK	= BIT(2),
+};
+#endif /* _GVE_REGISTER_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
new file mode 100644
index 000000000000..c1aeabd1c594
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/etherdevice.h>
+
+static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+	block->rx = NULL;
+}
+
+static void gve_rx_free_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *dev = &priv->pdev->dev;
+	size_t bytes;
+	u32 slots;
+
+	gve_rx_remove_from_block(priv, idx);
+
+	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+	dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
+	rx->desc.desc_ring = NULL;
+
+	dma_free_coherent(dev, sizeof(*rx->q_resources),
+			  rx->q_resources, rx->q_resources_bus);
+	rx->q_resources = NULL;
+
+	gve_unassign_qpl(priv, rx->data.qpl->id);
+	rx->data.qpl = NULL;
+	kfree(rx->data.page_info);
+
+	slots = rx->data.mask + 1;
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	dma_free_coherent(dev, bytes, rx->data.data_ring,
+			  rx->data.data_bus);
+	rx->data.data_ring = NULL;
+	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
+}
+
+static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
+				struct gve_rx_data_slot *slot,
+				dma_addr_t addr, struct page *page)
+{
+	page_info->page = page;
+	page_info->page_offset = 0;
+	page_info->page_address = page_address(page);
+	slot->qpl_offset = cpu_to_be64(addr);
+}
+
+static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+{
+	struct gve_priv *priv = rx->gve;
+	u32 slots;
+	int i;
+
+	/* Allocate one page per Rx queue slot. Each page is split into two
+	 * packet buffers, when possible we "page flip" between the two.
+	 */
+	slots = rx->data.mask + 1;
+
+	rx->data.page_info = kvzalloc(slots *
+				      sizeof(*rx->data.page_info), GFP_KERNEL);
+	if (!rx->data.page_info)
+		return -ENOMEM;
+
+	rx->data.qpl = gve_assign_rx_qpl(priv);
+
+	for (i = 0; i < slots; i++) {
+		struct page *page = rx->data.qpl->pages[i];
+		dma_addr_t addr = i * PAGE_SIZE;
+
+		gve_setup_rx_buffer(&rx->data.page_info[i],
+				    &rx->data.data_ring[i], addr, page);
+	}
+
+	return slots;
+}
+
+static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_rx_ring *rx = &priv->rx[queue_idx];
+
+	block->rx = rx;
+	rx->ntfy_id = ntfy_idx;
+}
+
+static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	u32 slots, npages;
+	int filled_pages;
+	size_t bytes;
+	int err;
+
+	netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
+	/* Make sure everything is zeroed to start with */
+	memset(rx, 0, sizeof(*rx));
+
+	rx->gve = priv;
+	rx->q_num = idx;
+
+	slots = priv->rx_pages_per_qpl;
+	rx->data.mask = slots - 1;
+
+	/* alloc rx data ring */
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
+						&rx->data.data_bus,
+						GFP_KERNEL);
+	if (!rx->data.data_ring)
+		return -ENOMEM;
+	filled_pages = gve_prefill_rx_pages(rx);
+	if (filled_pages < 0) {
+		err = -ENOMEM;
+		goto abort_with_slots;
+	}
+	rx->desc.fill_cnt = filled_pages;
+	/* Ensure data ring slots (packet buffers) are visible. */
+	dma_wmb();
+
+	/* Alloc gve_queue_resources */
+	rx->q_resources =
+		dma_alloc_coherent(hdev,
+				   sizeof(*rx->q_resources),
+				   &rx->q_resources_bus,
+				   GFP_KERNEL);
+	if (!rx->q_resources) {
+		err = -ENOMEM;
+		goto abort_filled;
+	}
+	netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
+		  (unsigned long)rx->data.data_bus);
+
+	/* alloc rx desc ring */
+	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+	npages = bytes / PAGE_SIZE;
+	if (npages * PAGE_SIZE != bytes) {
+		err = -EIO;
+		goto abort_with_q_resources;
+	}
+
+	rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
+						GFP_KERNEL);
+	if (!rx->desc.desc_ring) {
+		err = -ENOMEM;
+		goto abort_with_q_resources;
+	}
+	rx->desc.mask = slots - 1;
+	rx->desc.cnt = 0;
+	rx->desc.seqno = 1;
+	gve_rx_add_to_block(priv, idx);
+
+	return 0;
+
+abort_with_q_resources:
+	dma_free_coherent(hdev, sizeof(*rx->q_resources),
+			  rx->q_resources, rx->q_resources_bus);
+	rx->q_resources = NULL;
+abort_filled:
+	kfree(rx->data.page_info);
+abort_with_slots:
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
+	rx->data.data_ring = NULL;
+
+	return err;
+}
+
+int gve_rx_alloc_rings(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_rx_alloc_ring(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc rx ring=%d: err=%d\n",
+				  i, err);
+			break;
+		}
+	}
+	/* Unallocate if there was an error */
+	if (err) {
+		int j;
+
+		for (j = 0; j < i; j++)
+			gve_rx_free_ring(priv, j);
+	}
+	return err;
+}
+
+void gve_rx_free_rings(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++)
+		gve_rx_free_ring(priv, i);
+}
+
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+	u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
+
+	iowrite32be(rx->desc.fill_cnt, &priv->db_bar2[db_idx]);
+}
+
+static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
+{
+	if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
+		return PKT_HASH_TYPE_L4;
+	if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+		return PKT_HASH_TYPE_L3;
+	return PKT_HASH_TYPE_L2;
+}
+
+static struct sk_buff *gve_rx_copy(struct net_device *dev,
+				   struct napi_struct *napi,
+				   struct gve_rx_slot_page_info *page_info,
+				   u16 len)
+{
+	struct sk_buff *skb = napi_alloc_skb(napi, len);
+	void *va = page_info->page_address + GVE_RX_PAD +
+		   page_info->page_offset;
+
+	if (unlikely(!skb))
+		return NULL;
+
+	__skb_put(skb, len);
+
+	skb_copy_to_linear_data(skb, va, len);
+
+	skb->protocol = eth_type_trans(skb, dev);
+	return skb;
+}
+
+static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
+					struct napi_struct *napi,
+					struct gve_rx_slot_page_info *page_info,
+					u16 len)
+{
+	struct sk_buff *skb = napi_get_frags(napi);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_add_rx_frag(skb, 0, page_info->page,
+			page_info->page_offset +
+			GVE_RX_PAD, len, PAGE_SIZE / 2);
+
+	return skb;
+}
+
+static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
+			     struct gve_rx_data_slot *data_ring)
+{
+	u64 addr = be64_to_cpu(data_ring->qpl_offset);
+
+	page_info->page_offset ^= PAGE_SIZE / 2;
+	addr ^= PAGE_SIZE / 2;
+	data_ring->qpl_offset = cpu_to_be64(addr);
+}
+
+static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
+		   netdev_features_t feat)
+{
+	struct gve_rx_slot_page_info *page_info;
+	struct gve_priv *priv = rx->gve;
+	struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+	struct net_device *dev = priv->dev;
+	struct sk_buff *skb;
+	int pagecount;
+	u16 len;
+	u32 idx;
+
+	/* drop this packet */
+	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
+		return true;
+
+	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
+	idx = rx->data.cnt & rx->data.mask;
+	page_info = &rx->data.page_info[idx];
+
+	/* gvnic can only receive into registered segments. If the buffer
+	 * can't be recycled, our only choice is to copy the data out of
+	 * it so that we can return it to the device.
+	 */
+
+	if (PAGE_SIZE == 4096) {
+		if (len <= priv->rx_copybreak) {
+			/* Just copy small packets */
+			skb = gve_rx_copy(dev, napi, page_info, len);
+			goto have_skb;
+		}
+		if (unlikely(!gve_can_recycle_pages(dev))) {
+			skb = gve_rx_copy(dev, napi, page_info, len);
+			goto have_skb;
+		}
+		pagecount = page_count(page_info->page);
+		if (pagecount == 1) {
+			/* No part of this page is used by any SKBs; we attach
+			 * the page fragment to a new SKB and pass it up the
+			 * stack.
+			 */
+			skb = gve_rx_add_frags(dev, napi, page_info, len);
+			if (!skb)
+				return true;
+			/* Make sure the kernel stack can't release the page */
+			get_page(page_info->page);
+			/* "flip" to other packet buffer on this page */
+			gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
+		} else if (pagecount >= 2) {
+			/* We have previously passed the other half of this
+			 * page up the stack, but it has not yet been freed.
+			 */
+			skb = gve_rx_copy(dev, napi, page_info, len);
+		} else {
+			WARN(pagecount < 1, "Pagecount should never be < 1");
+			return false;
+		}
+	} else {
+		skb = gve_rx_copy(dev, napi, page_info, len);
+	}
+
+have_skb:
+	/* We didn't manage to allocate an skb but we haven't had any
+	 * reset worthy failures.
+	 */
+	if (!skb)
+		return true;
+
+	rx->data.cnt++;
+
+	if (likely(feat & NETIF_F_RXCSUM)) {
+		/* NIC passes up the partial sum */
+		if (rx_desc->csum)
+			skb->ip_summed = CHECKSUM_COMPLETE;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+		skb->csum = csum_unfold(rx_desc->csum);
+	}
+
+	/* parse flags & pass relevant info up */
+	if (likely(feat & NETIF_F_RXHASH) &&
+	    gve_needs_rss(rx_desc->flags_seq))
+		skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
+			     gve_rss_type(rx_desc->flags_seq));
+
+	if (skb_is_nonlinear(skb))
+		napi_gro_frags(napi);
+	else
+		napi_gro_receive(napi, skb);
+	return true;
+}
+
+static bool gve_rx_work_pending(struct gve_rx_ring *rx)
+{
+	struct gve_rx_desc *desc;
+	__be16 flags_seq;
+	u32 next_idx;
+
+	next_idx = rx->desc.cnt & rx->desc.mask;
+	desc = rx->desc.desc_ring + next_idx;
+
+	flags_seq = desc->flags_seq;
+	/* Make sure we have synchronized the seq no with the device */
+	smp_rmb();
+
+	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
+}
+
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+		       netdev_features_t feat)
+{
+	struct gve_priv *priv = rx->gve;
+	struct gve_rx_desc *desc;
+	u32 cnt = rx->desc.cnt;
+	u32 idx = cnt & rx->desc.mask;
+	u32 work_done = 0;
+	u64 bytes = 0;
+
+	desc = rx->desc.desc_ring + idx;
+	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
+	       work_done < budget) {
+		netif_info(priv, rx_status, priv->dev,
+			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
+			   rx->q_num, idx, desc, desc->flags_seq);
+		netif_info(priv, rx_status, priv->dev,
+			   "[%d] seqno=%d rx->desc.seqno=%d\n",
+			   rx->q_num, GVE_SEQNO(desc->flags_seq),
+			   rx->desc.seqno);
+		bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+		if (!gve_rx(rx, desc, feat))
+			gve_schedule_reset(priv);
+		cnt++;
+		idx = cnt & rx->desc.mask;
+		desc = rx->desc.desc_ring + idx;
+		rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
+		work_done++;
+	}
+
+	if (!work_done)
+		return false;
+
+	u64_stats_update_begin(&rx->statss);
+	rx->rpackets += work_done;
+	rx->rbytes += bytes;
+	u64_stats_update_end(&rx->statss);
+	rx->desc.cnt = cnt;
+	rx->desc.fill_cnt += work_done;
+
+	/* restock desc ring slots */
+	dma_wmb();	/* Ensure descs are visible before ringing doorbell */
+	gve_rx_write_doorbell(priv, rx);
+	return gve_rx_work_pending(rx);
+}
+
+bool gve_rx_poll(struct gve_notify_block *block, int budget)
+{
+	struct gve_rx_ring *rx = block->rx;
+	netdev_features_t feat;
+	bool repoll = false;
+
+	feat = block->napi.dev->features;
+
+	/* If budget is 0, do all the work */
+	if (budget == 0)
+		budget = INT_MAX;
+
+	if (budget > 0)
+		repoll |= gve_clean_rx_done(rx, budget, feat);
+	else
+		repoll |= gve_rx_work_pending(rx);
+	return repoll;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
new file mode 100644
index 000000000000..778b87b5a06c
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+static inline void gve_tx_put_doorbell(struct gve_priv *priv,
+				       struct gve_queue_resources *q_resources,
+				       u32 val)
+{
+	iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
+}
+
+/* gvnic can only transmit from a Registered Segment.
+ * We copy skb payloads into the registered segment before writing Tx
+ * descriptors and ringing the Tx doorbell.
+ *
+ * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must
+ * free allocations in the order they were allocated.
+ */
+
+static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+	fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
+			  PAGE_KERNEL);
+	if (unlikely(!fifo->base)) {
+		netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
+			  fifo->qpl->id);
+		return -ENOMEM;
+	}
+
+	fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
+	atomic_set(&fifo->available, fifo->size);
+	fifo->head = 0;
+	return 0;
+}
+
+static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+	WARN(atomic_read(&fifo->available) != fifo->size,
+	     "Releasing non-empty fifo");
+
+	vunmap(fifo->base);
+}
+
+static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
+					  size_t bytes)
+{
+	return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
+}
+
+static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
+{
+	return (atomic_read(&fifo->available) <= bytes) ? false : true;
+}
+
+/* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO
+ * @fifo: FIFO to allocate from
+ * @bytes: Allocation size
+ * @iov: Scatter-gather elements to fill with allocation fragment base/len
+ *
+ * Returns number of valid elements in iov[] or negative on error.
+ *
+ * Allocations from a given FIFO must be externally synchronized but concurrent
+ * allocation and frees are allowed.
+ */
+static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
+			     struct gve_tx_iovec iov[2])
+{
+	size_t overflow, padding;
+	u32 aligned_head;
+	int nfrags = 0;
+
+	if (!bytes)
+		return 0;
+
+	/* This check happens before we know how much padding is needed to
+	 * align to a cacheline boundary for the payload, but that is fine,
+	 * because the FIFO head always start aligned, and the FIFO's boundaries
+	 * are aligned, so if there is space for the data, there is space for
+	 * the padding to the next alignment.
+	 */
+	WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
+	     "Reached %s when there's not enough space in the fifo", __func__);
+
+	nfrags++;
+
+	iov[0].iov_offset = fifo->head;
+	iov[0].iov_len = bytes;
+	fifo->head += bytes;
+
+	if (fifo->head > fifo->size) {
+		/* If the allocation did not fit in the tail fragment of the
+		 * FIFO, also use the head fragment.
+		 */
+		nfrags++;
+		overflow = fifo->head - fifo->size;
+		iov[0].iov_len -= overflow;
+		iov[1].iov_offset = 0;	/* Start of fifo*/
+		iov[1].iov_len = overflow;
+
+		fifo->head = overflow;
+	}
+
+	/* Re-align to a cacheline boundary */
+	aligned_head = L1_CACHE_ALIGN(fifo->head);
+	padding = aligned_head - fifo->head;
+	iov[nfrags - 1].iov_padding = padding;
+	atomic_sub(bytes + padding, &fifo->available);
+	fifo->head = aligned_head;
+
+	if (fifo->head == fifo->size)
+		fifo->head = 0;
+
+	return nfrags;
+}
+
+/* gve_tx_free_fifo - Return space to Tx FIFO
+ * @fifo: FIFO to return fragments to
+ * @bytes: Bytes to free
+ */
+static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
+{
+	atomic_add(bytes, &fifo->available);
+}
+
+static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+	block->tx = NULL;
+}
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+			     u32 to_do, bool try_to_wake);
+
+static void gve_tx_free_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	size_t bytes;
+	u32 slots;
+
+	gve_tx_remove_from_block(priv, idx);
+	slots = tx->mask + 1;
+	gve_clean_tx_done(priv, tx, tx->req, false);
+	netdev_tx_reset_queue(tx->netdev_txq);
+
+	dma_free_coherent(hdev, sizeof(*tx->q_resources),
+			  tx->q_resources, tx->q_resources_bus);
+	tx->q_resources = NULL;
+
+	gve_tx_fifo_release(priv, &tx->tx_fifo);
+	gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+	tx->tx_fifo.qpl = NULL;
+
+	bytes = sizeof(*tx->desc) * slots;
+	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+	tx->desc = NULL;
+
+	vfree(tx->info);
+	tx->info = NULL;
+
+	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
+}
+
+static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_tx_ring *tx = &priv->tx[queue_idx];
+
+	block->tx = tx;
+	tx->ntfy_id = ntfy_idx;
+}
+
+static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	u32 slots = priv->tx_desc_cnt;
+	size_t bytes;
+
+	/* Make sure everything is zeroed to start */
+	memset(tx, 0, sizeof(*tx));
+	tx->q_num = idx;
+
+	tx->mask = slots - 1;
+
+	/* alloc metadata */
+	tx->info = vzalloc(sizeof(*tx->info) * slots);
+	if (!tx->info)
+		return -ENOMEM;
+
+	/* alloc tx queue */
+	bytes = sizeof(*tx->desc) * slots;
+	tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
+	if (!tx->desc)
+		goto abort_with_info;
+
+	tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+
+	/* map Tx FIFO */
+	if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+		goto abort_with_desc;
+
+	tx->q_resources =
+		dma_alloc_coherent(hdev,
+				   sizeof(*tx->q_resources),
+				   &tx->q_resources_bus,
+				   GFP_KERNEL);
+	if (!tx->q_resources)
+		goto abort_with_fifo;
+
+	netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
+		  (unsigned long)tx->bus);
+	tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+	gve_tx_add_to_block(priv, idx);
+
+	return 0;
+
+abort_with_fifo:
+	gve_tx_fifo_release(priv, &tx->tx_fifo);
+abort_with_desc:
+	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+	tx->desc = NULL;
+abort_with_info:
+	vfree(tx->info);
+	tx->info = NULL;
+	return -ENOMEM;
+}
+
+int gve_tx_alloc_rings(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_tx_alloc_ring(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc tx ring=%d: err=%d\n",
+				  i, err);
+			break;
+		}
+	}
+	/* Unallocate if there was an error */
+	if (err) {
+		int j;
+
+		for (j = 0; j < i; j++)
+			gve_tx_free_ring(priv, j);
+	}
+	return err;
+}
+
+void gve_tx_free_rings(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++)
+		gve_tx_free_ring(priv, i);
+}
+
+/* gve_tx_avail - Calculates the number of slots available in the ring
+ * @tx: tx ring to check
+ *
+ * Returns the number of slots available
+ *
+ * The capacity of the queue is mask + 1. We don't need to reserve an entry.
+ **/
+static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
+{
+	return tx->mask + 1 - (tx->req - tx->done);
+}
+
+static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
+					      struct sk_buff *skb)
+{
+	int pad_bytes, align_hdr_pad;
+	int bytes;
+	int hlen;
+
+	hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
+				 tcp_hdrlen(skb) : skb_headlen(skb);
+
+	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
+						   hlen);
+	/* We need to take into account the header alignment padding. */
+	align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
+	bytes = align_hdr_pad + pad_bytes + skb->len;
+
+	return bytes;
+}
+
+/* The most descriptors we could need are 3 - 1 for the headers, 1 for
+ * the beginning of the payload at the end of the FIFO, and 1 if the
+ * payload wraps to the beginning of the FIFO.
+ */
+#define MAX_TX_DESC_NEEDED	3
+
+/* Check if sufficient resources (descriptor ring space, FIFO space) are
+ * available to transmit the given number of bytes.
+ */
+static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
+{
+	return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
+		gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+}
+
+/* Stops the queue if the skb cannot be transmitted. */
+static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
+{
+	int bytes_required;
+
+	bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+	if (likely(gve_can_tx(tx, bytes_required)))
+		return 0;
+
+	/* No space, so stop the queue */
+	tx->stop_queue++;
+	netif_tx_stop_queue(tx->netdev_txq);
+	smp_mb();	/* sync with restarting queue in gve_clean_tx_done() */
+
+	/* Now check for resources again, in case gve_clean_tx_done() freed
+	 * resources after we checked and we stopped the queue after
+	 * gve_clean_tx_done() checked.
+	 *
+	 * gve_maybe_stop_tx()			gve_clean_tx_done()
+	 *   nsegs/can_alloc test failed
+	 *					  gve_tx_free_fifo()
+	 *					  if (tx queue stopped)
+	 *					    netif_tx_queue_wake()
+	 *   netif_tx_stop_queue()
+	 *   Need to check again for space here!
+	 */
+	if (likely(!gve_can_tx(tx, bytes_required)))
+		return -EBUSY;
+
+	netif_tx_start_queue(tx->netdev_txq);
+	tx->wake_queue++;
+	return 0;
+}
+
+static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
+				 struct sk_buff *skb, bool is_gso,
+				 int l4_hdr_offset, u32 desc_cnt,
+				 u16 hlen, u64 addr)
+{
+	/* l4_hdr_offset and csum_offset are in units of 16-bit words */
+	if (is_gso) {
+		pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
+		pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+		pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+		pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
+		pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+		pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+	} else {
+		pkt_desc->pkt.type_flags = GVE_TXD_STD;
+		pkt_desc->pkt.l4_csum_offset = 0;
+		pkt_desc->pkt.l4_hdr_offset = 0;
+	}
+	pkt_desc->pkt.desc_cnt = desc_cnt;
+	pkt_desc->pkt.len = cpu_to_be16(skb->len);
+	pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
+	pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
+}
+
+static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
+				 struct sk_buff *skb, bool is_gso,
+				 u16 len, u64 addr)
+{
+	seg_desc->seg.type_flags = GVE_TXD_SEG;
+	if (is_gso) {
+		if (skb_is_gso_v6(skb))
+			seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
+		seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
+		seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+	}
+	seg_desc->seg.seg_len = cpu_to_be16(len);
+	seg_desc->seg.seg_addr = cpu_to_be64(addr);
+}
+
+static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
+{
+	int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
+	union gve_tx_desc *pkt_desc, *seg_desc;
+	struct gve_tx_buffer_state *info;
+	bool is_gso = skb_is_gso(skb);
+	u32 idx = tx->req & tx->mask;
+	int payload_iov = 2;
+	int copy_offset;
+	u32 next_idx;
+	int i;
+
+	info = &tx->info[idx];
+	pkt_desc = &tx->desc[idx];
+
+	l4_hdr_offset = skb_checksum_start_offset(skb);
+	/* If the skb is gso, then we want the tcp header in the first segment
+	 * otherwise we want the linear portion of the skb (which will contain
+	 * the checksum because skb->csum_start and skb->csum_offset are given
+	 * relative to skb->head) in the first segment.
+	 */
+	hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+			skb_headlen(skb);
+
+	info->skb =  skb;
+	/* We don't want to split the header, so if necessary, pad to the end
+	 * of the fifo and then put the header at the beginning of the fifo.
+	 */
+	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
+	hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
+				       &info->iov[0]);
+	WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
+	payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
+					   &info->iov[payload_iov]);
+
+	gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+			     1 + payload_nfrags, hlen,
+			     info->iov[hdr_nfrags - 1].iov_offset);
+
+	skb_copy_bits(skb, 0,
+		      tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
+		      hlen);
+	copy_offset = hlen;
+
+	for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
+		next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
+		seg_desc = &tx->desc[next_idx];
+
+		gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
+				     info->iov[i].iov_len,
+				     info->iov[i].iov_offset);
+
+		skb_copy_bits(skb, copy_offset,
+			      tx->tx_fifo.base + info->iov[i].iov_offset,
+			      info->iov[i].iov_len);
+		copy_offset += info->iov[i].iov_len;
+	}
+
+	return 1 + payload_nfrags;
+}
+
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	struct gve_tx_ring *tx;
+	int nsegs;
+
+	WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
+	     "skb queue index out of range");
+	tx = &priv->tx[skb_get_queue_mapping(skb)];
+	if (unlikely(gve_maybe_stop_tx(tx, skb))) {
+		/* We need to ring the txq doorbell -- we have stopped the Tx
+		 * queue for want of resources, but prior calls to gve_tx()
+		 * may have added descriptors without ringing the doorbell.
+		 */
+
+		/* Ensure tx descs from a prior gve_tx are visible before
+		 * ringing doorbell.
+		 */
+		dma_wmb();
+		gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+		return NETDEV_TX_BUSY;
+	}
+	nsegs = gve_tx_add_skb(tx, skb);
+
+	netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+	skb_tx_timestamp(skb);
+
+	/* give packets to NIC */
+	tx->req += nsegs;
+
+	if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
+		return NETDEV_TX_OK;
+
+	/* Ensure tx descs are visible before ringing doorbell */
+	dma_wmb();
+	gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+	return NETDEV_TX_OK;
+}
+
+#define GVE_TX_START_THRESH	PAGE_SIZE
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+			     u32 to_do, bool try_to_wake)
+{
+	struct gve_tx_buffer_state *info;
+	u64 pkts = 0, bytes = 0;
+	size_t space_freed = 0;
+	struct sk_buff *skb;
+	int i, j;
+	u32 idx;
+
+	for (j = 0; j < to_do; j++) {
+		idx = tx->done & tx->mask;
+		netif_info(priv, tx_done, priv->dev,
+			   "[%d] %s: idx=%d (req=%u done=%u)\n",
+			   tx->q_num, __func__, idx, tx->req, tx->done);
+		info = &tx->info[idx];
+		skb = info->skb;
+
+		/* Mark as free */
+		if (skb) {
+			info->skb = NULL;
+			bytes += skb->len;
+			pkts++;
+			dev_consume_skb_any(skb);
+			/* FIFO free */
+			for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+				space_freed += info->iov[i].iov_len +
+					       info->iov[i].iov_padding;
+				info->iov[i].iov_len = 0;
+				info->iov[i].iov_padding = 0;
+			}
+		}
+		tx->done++;
+	}
+
+	gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+	u64_stats_update_begin(&tx->statss);
+	tx->bytes_done += bytes;
+	tx->pkt_done += pkts;
+	u64_stats_update_end(&tx->statss);
+	netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
+
+	/* start the queue if we've stopped it */
+#ifndef CONFIG_BQL
+	/* Make sure that the doorbells are synced */
+	smp_mb();
+#endif
+	if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
+	    likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
+		tx->wake_queue++;
+		netif_tx_wake_queue(tx->netdev_txq);
+	}
+
+	return pkts;
+}
+
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+				 struct gve_tx_ring *tx)
+{
+	u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
+
+	return READ_ONCE(priv->counter_array[counter_index]);
+}
+
+bool gve_tx_poll(struct gve_notify_block *block, int budget)
+{
+	struct gve_priv *priv = block->priv;
+	struct gve_tx_ring *tx = block->tx;
+	bool repoll = false;
+	u32 nic_done;
+	u32 to_do;
+
+	/* If budget is 0, do all the work */
+	if (budget == 0)
+		budget = INT_MAX;
+
+	/* Find out how much work there is to be done */
+	tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
+	nic_done = be32_to_cpu(tx->last_nic_done);
+	if (budget > 0) {
+		/* Do as much work as we have that the budget will
+		 * allow
+		 */
+		to_do = min_t(u32, (nic_done - tx->done), budget);
+		gve_clean_tx_done(priv, tx, to_do, true);
+	}
+	/* If we still have work we want to repoll */
+	repoll |= (nic_done != tx->done);
+	return repoll;
+}
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index a0d780c14e60..3892a2062404 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -46,6 +46,16 @@ config HIP04_ETH
 	  If you wish to compile a kernel for a hardware with hisilicon p04 SoC and
 	  want to use the internal ethernet then you should answer Y to this.
 
+config HI13X1_GMAC
+	bool "Hisilicon HI13X1 Network Device Support"
+	depends on HIP04_ETH
+	help
+	  If you wish to compile a kernel for a hardware with hisilicon hi13x1_gamc
+	  then you should answer Y to this. This makes this driver suitable for use
+	  on certain boards such as the HI13X1.
+
+	  If you are unsure, say N.
+
 config HNS_MDIO
 	tristate
 	select PHYLIB
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index e1f2978506fd..625635771b83 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -16,6 +16,8 @@
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
+#define SC_PPE_RESET_DREQ		0x026C
+
 #define PPE_CFG_RX_ADDR			0x100
 #define PPE_CFG_POOL_GRP		0x300
 #define PPE_CFG_RX_BUF_SIZE		0x400
@@ -33,10 +35,23 @@
 #define GE_MODE_CHANGE_REG		0x1b4
 #define GE_RECV_CONTROL_REG		0x1e0
 #define GE_STATION_MAC_ADDRESS		0x210
-#define PPE_CFG_CPU_ADD_ADDR		0x580
-#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
+
 #define PPE_CFG_BUS_CTRL_REG		0x424
 #define PPE_CFG_RX_CTRL_REG		0x428
+
+#if defined(CONFIG_HI13X1_GMAC)
+#define PPE_CFG_CPU_ADD_ADDR		0x6D0
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x500
+#define PPE_CFG_RX_PKT_MODE_REG		0x504
+#define PPE_CFG_QOS_VMID_GEN		0x520
+#define PPE_CFG_RX_PKT_INT		0x740
+#define PPE_INTEN			0x700
+#define PPE_INTSTS			0x708
+#define PPE_RINT			0x704
+#define PPE_CFG_STS_MODE		0x880
+#else
+#define PPE_CFG_CPU_ADD_ADDR		0x580
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
 #define PPE_CFG_RX_PKT_MODE_REG		0x438
 #define PPE_CFG_QOS_VMID_GEN		0x500
 #define PPE_CFG_RX_PKT_INT		0x538
@@ -44,8 +59,12 @@
 #define PPE_INTSTS			0x608
 #define PPE_RINT			0x604
 #define PPE_CFG_STS_MODE		0x700
+#endif /* CONFIG_HI13X1_GMAC */
+
 #define PPE_HIS_RX_PKT_CNT		0x804
 
+#define RESET_DREQ_ALL			0xffffffff
+
 /* REG_INTERRUPT */
 #define RCV_INT				BIT(10)
 #define RCV_NOBUF			BIT(8)
@@ -57,8 +76,15 @@
 /* TX descriptor config */
 #define TX_FREE_MEM			BIT(0)
 #define TX_READ_ALLOC_L3		BIT(1)
-#define TX_FINISH_CACHE_INV		BIT(2)
+#if defined(CONFIG_HI13X1_GMAC)
+#define TX_CLEAR_WB			BIT(7)
+#define TX_RELEASE_TO_PPE		BIT(4)
+#define TX_FINISH_CACHE_INV		BIT(6)
+#define TX_POOL_SHIFT			16
+#else
 #define TX_CLEAR_WB			BIT(4)
+#define TX_FINISH_CACHE_INV		BIT(2)
+#endif
 #define TX_L3_CHECKSUM			BIT(5)
 #define TX_LOOP_BACK			BIT(11)
 
@@ -93,18 +119,35 @@
 #define GE_RX_PORT_EN			BIT(1)
 #define GE_TX_PORT_EN			BIT(2)
 
-#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
-
 #define PPE_CFG_RX_PKT_ALIGN		BIT(18)
-#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+
+#if defined(CONFIG_HI13X1_GMAC)
+#define PPE_CFG_QOS_VMID_GRP_SHIFT	4
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	7
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(0)
+#define PPE_CFG_QOS_VMID_MODE		BIT(15)
+#define PPE_CFG_BUS_LOCAL_REL		(BIT(9) | BIT(15) | BIT(19) | BIT(23))
+
+/* buf unit size is cache_line_size, which is 64, so the shift is 6 */
+#define PPE_BUF_SIZE_SHIFT		6
+#define PPE_TX_BUF_HOLD			BIT(31)
+#define CACHE_LINE_MASK			0x3F
+#else
 #define PPE_CFG_QOS_VMID_GRP_SHIFT	8
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
+#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
+
+/* buf unit size is 1, so the shift is 6 */
+#define PPE_BUF_SIZE_SHIFT		0
+#define PPE_TX_BUF_HOLD			0
+#endif /* CONFIG_HI13X1_GMAC */
 
 #define PPE_CFG_RX_FIFO_FSFU		BIT(11)
 #define PPE_CFG_RX_DEPTH_SHIFT		16
 #define PPE_CFG_RX_START_SHIFT		0
-#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
 
-#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
 #define PPE_CFG_BUS_BIG_ENDIEN		BIT(0)
 
 #define RX_DESC_NUM			128
@@ -128,26 +171,50 @@
 #define HIP04_MIN_TX_COALESCE_FRAMES	100
 
 struct tx_desc {
+#if defined(CONFIG_HI13X1_GMAC)
+	u32 reserved1[2];
+	u32 send_addr;
+	u16 send_size;
+	u16 data_offset;
+	u32 reserved2[7];
+	u32 cfg;
+	u32 wb_addr;
+	u32 reserved3[3];
+#else
 	u32 send_addr;
 	u32 send_size;
 	u32 next_addr;
 	u32 cfg;
 	u32 wb_addr;
+#endif
 } __aligned(64);
 
 struct rx_desc {
+#if defined(CONFIG_HI13X1_GMAC)
+	u32 reserved1[3];
+	u16 pkt_len;
+	u16 reserved_16;
+	u32 reserved2[6];
+	u32 pkt_err;
+	u32 reserved3[5];
+#else
 	u16 reserved_16;
 	u16 pkt_len;
 	u32 reserve1[3];
 	u32 pkt_err;
 	u32 reserve2[4];
+#endif
 };
 
 struct hip04_priv {
 	void __iomem *base;
+#if defined(CONFIG_HI13X1_GMAC)
+	void __iomem *sysctrl_base;
+#endif
 	int phy_mode;
 	int chan;
 	unsigned int port;
+	unsigned int group;
 	unsigned int speed;
 	unsigned int duplex;
 	unsigned int reg_inten;
@@ -221,6 +288,13 @@ static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex)
 	writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG);
 }
 
+static void hip04_reset_dreq(struct hip04_priv *priv)
+{
+#if defined(CONFIG_HI13X1_GMAC)
+	writel_relaxed(RESET_DREQ_ALL, priv->sysctrl_base + SC_PPE_RESET_DREQ);
+#endif
+}
+
 static void hip04_reset_ppe(struct hip04_priv *priv)
 {
 	u32 val, tmp, timeout = 0;
@@ -241,14 +315,14 @@ static void hip04_config_fifo(struct hip04_priv *priv)
 	val |= PPE_CFG_STS_RX_PKT_CNT_RC;
 	writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
 
-	val = BIT(priv->port);
+	val = BIT(priv->group);
 	regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val);
 
-	val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT;
+	val = priv->group << PPE_CFG_QOS_VMID_GRP_SHIFT;
 	val |= PPE_CFG_QOS_VMID_MODE;
 	writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
 
-	val = RX_BUF_SIZE;
+	val = RX_BUF_SIZE >> PPE_BUF_SIZE_SHIFT;
 	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val);
 
 	val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT;
@@ -285,8 +359,10 @@ static void hip04_config_fifo(struct hip04_priv *priv)
 	val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN;
 	writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
 
+#ifndef CONFIG_HI13X1_GMAC
 	val = GE_AUTO_NEG_CTL;
 	writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+#endif
 }
 
 static void hip04_mac_enable(struct net_device *ndev)
@@ -329,12 +405,18 @@ static void hip04_mac_disable(struct net_device *ndev)
 
 static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
 {
-	writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR);
+	u32 val;
+
+	val = phys >> PPE_BUF_SIZE_SHIFT | PPE_TX_BUF_HOLD;
+	writel(val, priv->base + PPE_CFG_CPU_ADD_ADDR);
 }
 
 static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
 {
-	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys);
+	u32 val;
+
+	val = phys >> PPE_BUF_SIZE_SHIFT;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, val);
 }
 
 static u32 hip04_recv_cnt(struct hip04_priv *priv)
@@ -442,11 +524,20 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	priv->tx_skb[tx_head] = skb;
 	priv->tx_phys[tx_head] = phys;
-	desc->send_addr = cpu_to_be32(phys);
-	desc->send_size = cpu_to_be32(skb->len);
-	desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+
+	desc->send_size = (__force u32)cpu_to_be32(skb->len);
+#if defined(CONFIG_HI13X1_GMAC)
+	desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV
+		| TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT);
+	desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK);
+	desc->send_addr =  (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK);
+#else
+	desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+	desc->send_addr = (__force u32)cpu_to_be32(phys);
+#endif
 	phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
-	desc->wb_addr = cpu_to_be32(phys);
+	desc->wb_addr = (__force u32)cpu_to_be32(phys +
+		offsetof(struct tx_desc, send_addr));
 	skb_tx_timestamp(skb);
 
 	hip04_set_xmit_desc(priv, phys);
@@ -507,8 +598,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget)
 		priv->rx_phys[priv->rx_head] = 0;
 
 		desc = (struct rx_desc *)skb->data;
-		len = be16_to_cpu(desc->pkt_len);
-		err = be32_to_cpu(desc->pkt_err);
+		len = be16_to_cpu((__force __be16)desc->pkt_len);
+		err = be32_to_cpu((__force __be32)desc->pkt_err);
 
 		if (0 == len) {
 			dev_kfree_skb_any(skb);
@@ -828,7 +919,16 @@ static int hip04_mac_probe(struct platform_device *pdev)
 		goto init_fail;
 	}
 
-	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg);
+#if defined(CONFIG_HI13X1_GMAC)
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->sysctrl_base = devm_ioremap_resource(d, res);
+	if (IS_ERR(priv->sysctrl_base)) {
+		ret = PTR_ERR(priv->sysctrl_base);
+		goto init_fail;
+	}
+#endif
+
+	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 3, 0, &arg);
 	if (ret < 0) {
 		dev_warn(d, "no port-handle\n");
 		goto init_fail;
@@ -836,6 +936,7 @@ static int hip04_mac_probe(struct platform_device *pdev)
 
 	priv->port = arg.args[0];
 	priv->chan = arg.args[1] * RX_DESC_NUM;
+	priv->group = arg.args[2];
 
 	hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
@@ -896,6 +997,7 @@ static int hip04_mac_probe(struct platform_device *pdev)
 	ndev->irq = irq;
 	netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT);
 
+	hip04_reset_dreq(priv);
 	hip04_reset_ppe(priv);
 	if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
 		hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index fe879c07ae3c..2235dd55fab2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2370,6 +2370,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 		ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 			NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6;
+		ndev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6;
 		ndev->max_mtu = MAC_MAX_MTU_V2 -
 				(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 		break;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 83e19c6b974e..8ad5292eebbe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -69,7 +69,7 @@ enum hclge_mbx_vlan_cfg_subcode {
 };
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
-#define HCLGE_MBX_MAX_RESP_DATA_SIZE	16
+#define HCLGE_MBX_MAX_RESP_DATA_SIZE	8
 #define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM	3
 #define HCLGE_MBX_RING_NODE_VARIABLE_NUM	3
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index fa8b8506b120..908d4f45c06a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -16,21 +16,18 @@ static LIST_HEAD(hnae3_ae_dev_list);
  */
 static DEFINE_MUTEX(hnae3_common_lock);
 
-static bool hnae3_client_match(enum hnae3_client_type client_type,
-			       enum hnae3_dev_type dev_type)
+static bool hnae3_client_match(enum hnae3_client_type client_type)
 {
-	if ((dev_type == HNAE3_DEV_KNIC) && (client_type == HNAE3_CLIENT_KNIC ||
-					     client_type == HNAE3_CLIENT_ROCE))
-		return true;
-
-	if (dev_type == HNAE3_DEV_UNIC && client_type == HNAE3_CLIENT_UNIC)
+	if (client_type == HNAE3_CLIENT_KNIC ||
+	    client_type == HNAE3_CLIENT_ROCE)
 		return true;
 
 	return false;
 }
 
 void hnae3_set_client_init_flag(struct hnae3_client *client,
-				struct hnae3_ae_dev *ae_dev, int inited)
+				struct hnae3_ae_dev *ae_dev,
+				unsigned int inited)
 {
 	if (!client || !ae_dev)
 		return;
@@ -39,9 +36,6 @@ void hnae3_set_client_init_flag(struct hnae3_client *client,
 	case HNAE3_CLIENT_KNIC:
 		hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
 		break;
-	case HNAE3_CLIENT_UNIC:
-		hnae3_set_bit(ae_dev->flag, HNAE3_UNIC_CLIENT_INITED_B, inited);
-		break;
 	case HNAE3_CLIENT_ROCE:
 		hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
 		break;
@@ -61,10 +55,6 @@ static int hnae3_get_client_init_flag(struct hnae3_client *client,
 		inited = hnae3_get_bit(ae_dev->flag,
 				       HNAE3_KNIC_CLIENT_INITED_B);
 		break;
-	case HNAE3_CLIENT_UNIC:
-		inited = hnae3_get_bit(ae_dev->flag,
-				       HNAE3_UNIC_CLIENT_INITED_B);
-		break;
 	case HNAE3_CLIENT_ROCE:
 		inited = hnae3_get_bit(ae_dev->flag,
 				       HNAE3_ROCE_CLIENT_INITED_B);
@@ -82,7 +72,7 @@ static int hnae3_init_client_instance(struct hnae3_client *client,
 	int ret;
 
 	/* check if this client matches the type of ae_dev */
-	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
+	if (!(hnae3_client_match(client->type) &&
 	      hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
 		return 0;
 	}
@@ -99,7 +89,7 @@ static void hnae3_uninit_client_instance(struct hnae3_client *client,
 					 struct hnae3_ae_dev *ae_dev)
 {
 	/* check if this client matches the type of ae_dev */
-	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
+	if (!(hnae3_client_match(client->type) &&
 	      hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)))
 		return;
 
@@ -251,6 +241,7 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+		ae_dev->ops = NULL;
 	}
 
 	list_del(&ae_algo->node);
@@ -351,6 +342,7 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+		ae_dev->ops = NULL;
 	}
 
 	list_del(&ae_dev->node);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ad21b0ef1946..48c7b70fc2c4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -102,15 +102,9 @@ enum hnae3_loop {
 
 enum hnae3_client_type {
 	HNAE3_CLIENT_KNIC,
-	HNAE3_CLIENT_UNIC,
 	HNAE3_CLIENT_ROCE,
 };
 
-enum hnae3_dev_type {
-	HNAE3_DEV_KNIC,
-	HNAE3_DEV_UNIC,
-};
-
 /* mac media type */
 enum hnae3_media_type {
 	HNAE3_MEDIA_TYPE_UNKNOWN,
@@ -154,7 +148,6 @@ enum hnae3_reset_type {
 	HNAE3_VF_FULL_RESET,
 	HNAE3_FLR_RESET,
 	HNAE3_FUNC_RESET,
-	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
 	HNAE3_IMP_RESET,
 	HNAE3_UNKNOWN_RESET,
@@ -220,8 +213,7 @@ struct hnae3_ae_dev {
 	const struct hnae3_ae_ops *ops;
 	struct list_head node;
 	u32 flag;
-	u8 override_pci_need_reset; /* fix to stop multiple reset happening */
-	enum hnae3_dev_type dev_type;
+	unsigned long hw_err_reset_req;
 	enum hnae3_reset_type reset_type;
 	void *priv;
 };
@@ -271,6 +263,8 @@ struct hnae3_ae_dev {
  *   get auto autonegotiation of pause frame use
  * restart_autoneg()
  *   restart autonegotiation
+ * halt_autoneg()
+ *   halt/resume autonegotiation when autonegotiation on
  * get_coalesce_usecs()
  *   get usecs to delay a TX interrupt after a packet is sent
  * get_rx_max_coalesced_frames()
@@ -339,10 +333,14 @@ struct hnae3_ae_dev {
  *   Set vlan filter config of Ports
  * set_vf_vlan_filter()
  *   Set vlan filter config of vf
+ * restore_vlan_table()
+ *   Restore vlan filter entries after reset
  * enable_hw_strip_rxvtag()
  *   Enable/disable hardware strip vlan tag of packets received
  * set_gro_en
  *   Enable/disable HW GRO
+ * add_arfs_entry
+ *   Check the 5-tuples of flow, and create flow director rule
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -386,6 +384,7 @@ struct hnae3_ae_ops {
 	int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
 	int (*get_autoneg)(struct hnae3_handle *handle);
 	int (*restart_autoneg)(struct hnae3_handle *handle);
+	int (*halt_autoneg)(struct hnae3_handle *handle, bool halt);
 
 	void (*get_coalesce_usecs)(struct hnae3_handle *handle,
 				   u32 *tx_usecs, u32 *rx_usecs);
@@ -463,6 +462,8 @@ struct hnae3_ae_ops {
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
 	void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle);
+	enum hnae3_reset_type (*get_reset_level)(struct hnae3_ae_dev *ae_dev,
+						 unsigned long *addr);
 	void (*set_default_reset_request)(struct hnae3_ae_dev *ae_dev,
 					  enum hnae3_reset_type rst_type);
 	void (*get_channels)(struct hnae3_handle *handle,
@@ -492,7 +493,9 @@ struct hnae3_ae_ops {
 				struct ethtool_rxnfc *cmd, u32 *rule_locs);
 	int (*restore_fd_rules)(struct hnae3_handle *handle);
 	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
-	int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf);
+	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
+			      u16 flow_id, struct flow_keys *fkeys);
+	int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf);
 	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
 	bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
 	bool (*ae_dev_resetting)(struct hnae3_handle *handle);
@@ -502,6 +505,7 @@ struct hnae3_ae_ops {
 	void (*set_timer_task)(struct hnae3_handle *handle, bool enable);
 	int (*mac_connect_phy)(struct hnae3_handle *handle);
 	void (*mac_disconnect_phy)(struct hnae3_handle *handle);
+	void (*restore_vlan_table)(struct hnae3_handle *handle);
 };
 
 struct hnae3_dcb_ops {
@@ -643,5 +647,6 @@ void hnae3_unregister_client(struct hnae3_client *client);
 int hnae3_register_client(struct hnae3_client *client);
 
 void hnae3_set_client_init_flag(struct hnae3_client *client,
-				struct hnae3_ae_dev *ae_dev, int inited);
+				struct hnae3_ae_dev *ae_dev,
+				unsigned int inited);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
index b6fabbbdfd5b..d2ec4c573bf8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
@@ -4,8 +4,7 @@
 #include "hnae3.h"
 #include "hns3_enet.h"
 
-static
-int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+static int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
@@ -18,8 +17,7 @@ int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+static int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
@@ -32,8 +30,7 @@ int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+static int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
@@ -46,8 +43,7 @@ int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+static int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fc4917ac44be..a4b937286f55 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -11,7 +11,8 @@
 
 static struct dentry *hns3_dbgfs_root;
 
-static int hns3_dbg_queue_info(struct hnae3_handle *h, char *cmd_buf)
+static int hns3_dbg_queue_info(struct hnae3_handle *h,
+			       const char *cmd_buf)
 {
 	struct hns3_nic_priv *priv = h->priv;
 	struct hns3_nic_ring_data *ring_data;
@@ -155,7 +156,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
 	return 0;
 }
 
-static int hns3_dbg_bd_info(struct hnae3_handle *h, char *cmd_buf)
+static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 {
 	struct hns3_nic_priv *priv = h->priv;
 	struct hns3_nic_ring_data *ring_data;
@@ -252,6 +253,7 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mng tbl\n");
 	dev_info(&h->pdev->dev, "dump reset info\n");
+	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f326805543a4..310afa708831 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4,6 +4,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -14,6 +17,7 @@
 #include <linux/sctp.h>
 #include <linux/vermagic.h>
 #include <net/gre.h>
+#include <net/ip6_checksum.h>
 #include <net/pkt_cls.h>
 #include <net/tcp.h>
 #include <net/vxlan.h>
@@ -24,8 +28,7 @@
 #define hns3_set_field(origin, shift, val)	((origin) |= ((val) << (shift)))
 #define hns3_tx_bd_count(S)	DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
 
-static void hns3_clear_all_ring(struct hnae3_handle *h);
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h);
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force);
 static void hns3_remove_hw_addr(struct net_device *netdev);
 
 static const char hns3_driver_name[] = "hns3";
@@ -79,23 +82,6 @@ static irqreturn_t hns3_irq_handle(int irq, void *vector)
 	return IRQ_HANDLED;
 }
 
-/* This callback function is used to set affinity changes to the irq affinity
- * masks when the irq_set_affinity_notifier function is used.
- */
-static void hns3_nic_irq_affinity_notify(struct irq_affinity_notify *notify,
-					 const cpumask_t *mask)
-{
-	struct hns3_enet_tqp_vector *tqp_vectors =
-		container_of(notify, struct hns3_enet_tqp_vector,
-			     affinity_notify);
-
-	tqp_vectors->affinity_mask = *mask;
-}
-
-static void hns3_nic_irq_affinity_release(struct kref *ref)
-{
-}
-
 static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
 {
 	struct hns3_enet_tqp_vector *tqp_vectors;
@@ -107,8 +93,7 @@ static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
 		if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
 			continue;
 
-		/* clear the affinity notifier and affinity mask */
-		irq_set_affinity_notifier(tqp_vectors->vector_irq, NULL);
+		/* clear the affinity mask */
 		irq_set_affinity_hint(tqp_vectors->vector_irq, NULL);
 
 		/* release the irq resource */
@@ -153,20 +138,14 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
 		tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0';
 
 		ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0,
-				  tqp_vectors->name,
-				       tqp_vectors);
+				  tqp_vectors->name, tqp_vectors);
 		if (ret) {
 			netdev_err(priv->netdev, "request irq(%d) fail\n",
 				   tqp_vectors->vector_irq);
+			hns3_nic_uninit_irq(priv);
 			return ret;
 		}
 
-		tqp_vectors->affinity_notify.notify =
-					hns3_nic_irq_affinity_notify;
-		tqp_vectors->affinity_notify.release =
-					hns3_nic_irq_affinity_release;
-		irq_set_affinity_notifier(tqp_vectors->vector_irq,
-					  &tqp_vectors->affinity_notify);
 		irq_set_affinity_hint(tqp_vectors->vector_irq,
 				      &tqp_vectors->affinity_mask);
 
@@ -297,8 +276,7 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	ret = netif_set_real_num_tx_queues(netdev, queue_size);
 	if (ret) {
 		netdev_err(netdev,
-			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
-			   ret);
+			   "netif_set_real_num_tx_queues fail, ret=%d!\n", ret);
 		return ret;
 	}
 
@@ -340,6 +318,40 @@ static void hns3_tqp_disable(struct hnae3_queue *tqp)
 	hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
 }
 
+static void hns3_free_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int hns3_set_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int i, ret;
+
+	if (!netdev->rx_cpu_rmap) {
+		netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num);
+		if (!netdev->rx_cpu_rmap)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				       tqp_vector->vector_irq);
+		if (ret) {
+			hns3_free_rx_cpu_rmap(netdev);
+			return ret;
+		}
+	}
+#endif
+	return 0;
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -351,11 +363,16 @@ static int hns3_nic_net_up(struct net_device *netdev)
 	if (ret)
 		return ret;
 
+	/* the device can work without cpu rmap, only aRFS needs it */
+	ret = hns3_set_rx_cpu_rmap(netdev);
+	if (ret)
+		netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
+
 	/* get irq resource for all vectors */
 	ret = hns3_nic_init_irq(priv);
 	if (ret) {
-		netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
-		return ret;
+		netdev_err(netdev, "init irq failed! ret=%d\n", ret);
+		goto free_rmap;
 	}
 
 	clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
@@ -384,7 +401,8 @@ out_start_err:
 		hns3_vector_disable(&priv->tqp_vector[j]);
 
 	hns3_nic_uninit_irq(priv);
-
+free_rmap:
+	hns3_free_rx_cpu_rmap(netdev);
 	return ret;
 }
 
@@ -429,16 +447,13 @@ static int hns3_nic_net_open(struct net_device *netdev)
 
 	ret = hns3_nic_net_up(netdev);
 	if (ret) {
-		netdev_err(netdev,
-			   "hns net up fail, ret=%d!\n", ret);
+		netdev_err(netdev, "net up fail, ret=%d!\n", ret);
 		return ret;
 	}
 
 	kinfo = &h->kinfo;
-	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
-		netdev_set_prio_tc_map(netdev, i,
-				       kinfo->prio_tc[i]);
-	}
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+		netdev_set_prio_tc_map(netdev, i, kinfo->prio_tc[i]);
 
 	if (h->ae_algo->ops->set_timer_task)
 		h->ae_algo->ops->set_timer_task(priv->ae_handle, true);
@@ -447,6 +462,20 @@ static int hns3_nic_net_open(struct net_device *netdev)
 	return 0;
 }
 
+static void hns3_reset_tx_queue(struct hnae3_handle *h)
+{
+	struct net_device *ndev = h->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct netdev_queue *dev_queue;
+	u32 i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		dev_queue = netdev_get_tx_queue(ndev,
+						priv->ring_data[i].queue_index);
+		netdev_tx_reset_queue(dev_queue);
+	}
+}
+
 static void hns3_nic_net_down(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -467,10 +496,19 @@ static void hns3_nic_net_down(struct net_device *netdev)
 	if (ops->stop)
 		ops->stop(priv->ae_handle);
 
+	hns3_free_rx_cpu_rmap(netdev);
+
 	/* free irq resources */
 	hns3_nic_uninit_irq(priv);
 
-	hns3_clear_all_ring(priv->ae_handle);
+	/* delay ring buffer clearing to hns3_reset_notify_uninit_enet
+	 * during reset process, because driver may not be able
+	 * to disable the ring through firmware when downing the netdev.
+	 */
+	if (!hns3_nic_resetting(netdev))
+		hns3_clear_all_ring(priv->ae_handle, false);
+
+	hns3_reset_tx_queue(priv->ae_handle);
 }
 
 static int hns3_nic_net_stop(struct net_device *netdev)
@@ -641,7 +679,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 	if (l3.v4->version == 4)
 		l3.v4->check = 0;
 
-	/* tunnel packet.*/
+	/* tunnel packet */
 	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
 					 SKB_GSO_GRE_CSUM |
 					 SKB_GSO_UDP_TUNNEL |
@@ -666,11 +704,11 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 			l3.v4->check = 0;
 	}
 
-	/* normal or tunnel packet*/
+	/* normal or tunnel packet */
 	l4_offset = l4.hdr - skb->data;
 	hdr_len = (l4.tcp->doff << 2) + l4_offset;
 
-	/* remove payload length from inner pseudo checksum when tso*/
+	/* remove payload length from inner pseudo checksum when tso */
 	l4_paylen = skb->len - l4_offset;
 	csum_replace_by_diff(&l4.tcp->check,
 			     (__force __wsum)htonl(l4_paylen));
@@ -778,7 +816,7 @@ static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 	hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_S, l3_len >> 2);
 
 	il2_hdr = skb_inner_mac_header(skb);
-	/* compute OL4 header size, defined in 4 Bytes. */
+	/* compute OL4 header size, defined in 4 Bytes */
 	l4_len = il2_hdr - l4.hdr;
 	hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_S, l4_len >> 2);
 
@@ -913,8 +951,9 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 {
 	/* Config bd buffer end */
-	hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
-	hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
+	if (!!frag_end)
+		hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U);
+	hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U);
 }
 
 static int hns3_fill_desc_vtags(struct sk_buff *skb,
@@ -988,7 +1027,8 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  int size, int frag_end, enum hns_desc_type type)
+			  unsigned int size, int frag_end,
+			  enum hns_desc_type type)
 {
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
@@ -1038,8 +1078,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		/* Set txbd */
 		desc->tx.ol_type_vlan_len_msec =
 			cpu_to_le32(ol_type_vlan_len_msec);
-		desc->tx.type_cs_vlan_tso_len =
-			cpu_to_le32(type_cs_vlan_tso);
+		desc->tx.type_cs_vlan_tso_len =	cpu_to_le32(type_cs_vlan_tso);
 		desc->tx.paylen = cpu_to_le32(paylen);
 		desc->tx.mss = cpu_to_le16(mss);
 		desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
@@ -1086,19 +1125,19 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc_cb->priv = priv;
 		desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
 		desc_cb->type = (type == DESC_TYPE_SKB && !k) ?
-					DESC_TYPE_SKB : DESC_TYPE_PAGE;
+				DESC_TYPE_SKB : DESC_TYPE_PAGE;
 
 		/* now, fill the descriptor */
 		desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
 		desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
-				(u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
+				     (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
 		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
 				       frag_end && (k == frag_buf_num - 1) ?
 						1 : 0);
 		desc->tx.bdtp_fe_sc_vld_ra_ri =
 				cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
 
-		/* move ring pointer to next.*/
+		/* move ring pointer to next */
 		ring_ptr_move_fw(ring, next_to_use);
 
 		desc_cb = &ring->desc_cb[ring->next_to_use];
@@ -1452,12 +1491,10 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			start = u64_stats_fetch_begin_irq(&ring->syncp);
 			rx_bytes += ring->stats.rx_bytes;
 			rx_pkts += ring->stats.rx_pkts;
-			rx_drop += ring->stats.non_vld_descs;
 			rx_drop += ring->stats.l2_err;
-			rx_errors += ring->stats.non_vld_descs;
 			rx_errors += ring->stats.l2_err;
+			rx_errors += ring->stats.l3l4_csum_err;
 			rx_crc_errors += ring->stats.l2_err;
-			rx_crc_errors += ring->stats.l3l4_csum_err;
 			rx_multicast += ring->stats.rx_multicast;
 			rx_length_errors += ring->stats.err_pkt_len;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
@@ -1493,12 +1530,12 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 {
 	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hnae3_knic_private_info *kinfo = &h->kinfo;
 	u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map;
+	struct hnae3_knic_private_info *kinfo;
 	u8 tc = mqprio_qopt->qopt.num_tc;
 	u16 mode = mqprio_qopt->mode;
 	u8 hw = mqprio_qopt->qopt.hw;
+	struct hnae3_handle *h;
 
 	if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
 	       mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0)))
@@ -1510,6 +1547,9 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 	if (!netdev)
 		return -EINVAL;
 
+	h = hns3_get_handle(netdev);
+	kinfo = &h->kinfo;
+
 	return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
 		kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
 }
@@ -1527,15 +1567,11 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 				__be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
-	if (!ret)
-		set_bit(vid, priv->active_vlans);
-
 	return ret;
 }
 
@@ -1543,33 +1579,11 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
-	if (!ret)
-		clear_bit(vid, priv->active_vlans);
-
-	return ret;
-}
-
-static int hns3_restore_vlan(struct net_device *netdev)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	int ret = 0;
-	u16 vid;
-
-	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
-		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
-		if (ret) {
-			netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n",
-				   vid, ret);
-			return ret;
-		}
-	}
-
 	return ret;
 }
 
@@ -1581,7 +1595,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 
 	if (h->ae_algo->ops->set_vf_vlan_filter)
 		ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
-						   qos, vlan_proto);
+							  qos, vlan_proto);
 
 	return ret;
 }
@@ -1722,6 +1736,32 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 		h->ae_algo->ops->reset_event(h->pdev, h);
 }
 
+#ifdef CONFIG_RFS_ACCEL
+static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+			      u16 rxq_index, u32 flow_id)
+{
+	struct hnae3_handle *h = hns3_get_handle(dev);
+	struct flow_keys fkeys;
+
+	if (!h->ae_algo->ops->add_arfs_entry)
+		return -EOPNOTSUPP;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0))
+		return -EPROTONOSUPPORT;
+
+	if ((fkeys.basic.n_proto != htons(ETH_P_IP) &&
+	     fkeys.basic.n_proto != htons(ETH_P_IPV6)) ||
+	    (fkeys.basic.ip_proto != IPPROTO_TCP &&
+	     fkeys.basic.ip_proto != IPPROTO_UDP))
+		return -EPROTONOSUPPORT;
+
+	return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys);
+}
+#endif
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_open		= hns3_nic_net_open,
 	.ndo_stop		= hns3_nic_net_stop,
@@ -1737,6 +1777,10 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
 	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer	= hns3_rx_flow_steer,
+#endif
+
 };
 
 bool hns3_is_phys_func(struct pci_dev *pdev)
@@ -1802,8 +1846,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct hnae3_ae_dev *ae_dev;
 	int ret;
 
-	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev),
-			      GFP_KERNEL);
+	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), GFP_KERNEL);
 	if (!ae_dev) {
 		ret = -ENOMEM;
 		return ret;
@@ -1811,7 +1854,6 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ae_dev->pdev = pdev;
 	ae_dev->flag = ent->driver_data;
-	ae_dev->dev_type = HNAE3_DEV_KNIC;
 	ae_dev->reset_type = HNAE3_NONE_RESET;
 	hns3_get_dev_capability(pdev, ae_dev);
 	pci_set_drvdata(pdev, ae_dev);
@@ -1895,9 +1937,9 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
 	if (state == pci_channel_io_perm_failure)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	if (!ae_dev) {
+	if (!ae_dev || !ae_dev->ops) {
 		dev_err(&pdev->dev,
-			"Can't recover - error happened during device init\n");
+			"Can't recover - error happened before device initialized\n");
 		return PCI_ERS_RESULT_NONE;
 	}
 
@@ -1912,14 +1954,23 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
 static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	const struct hnae3_ae_ops *ops;
+	enum hnae3_reset_type reset_type;
 	struct device *dev = &pdev->dev;
 
-	dev_info(dev, "requesting reset due to PCI error\n");
+	if (!ae_dev || !ae_dev->ops)
+		return PCI_ERS_RESULT_NONE;
 
+	ops = ae_dev->ops;
 	/* request the reset */
-	if (ae_dev->ops->reset_event) {
-		if (!ae_dev->override_pci_need_reset)
-			ae_dev->ops->reset_event(pdev, NULL);
+	if (ops->reset_event) {
+		if (ae_dev->hw_err_reset_req) {
+			reset_type = ops->get_reset_level(ae_dev,
+						&ae_dev->hw_err_reset_req);
+			ops->set_default_reset_request(ae_dev, reset_type);
+			dev_info(dev, "requesting reset due to PCI error\n");
+			ops->reset_event(pdev, NULL);
+		}
 
 		return PCI_ERS_RESULT_RECOVERED;
 	}
@@ -2168,7 +2219,7 @@ out_buffer_fail:
 	return ret;
 }
 
-/* detach a in-used buffer and replace with a reserved one  */
+/* detach a in-used buffer and replace with a reserved one */
 static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 				struct hns3_desc_cb *res_cb)
 {
@@ -2181,8 +2232,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 {
 	ring->desc_cb[i].reuse_flag = 0;
-	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
-		+ ring->desc_cb[i].page_offset);
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+					 ring->desc_cb[i].page_offset);
 	ring->desc[i].rx.bd_base_info = 0;
 }
 
@@ -2284,8 +2335,8 @@ static int hns3_desc_unused(struct hns3_enet_ring *ring)
 	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
 }
 
-static void
-hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
+static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
+				      int cleand_count)
 {
 	struct hns3_desc_cb *desc_cb;
 	struct hns3_desc_cb res_cbs;
@@ -2338,7 +2389,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	/* Avoid re-using remote pages, or the stack is still using the page
 	 * when page_offset rollback to zero, flag default unreuse
 	 */
-	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()) ||
+	if (unlikely(page_to_nid(desc_cb->priv) != numa_mem_id()) ||
 	    (!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
 		return;
 
@@ -2347,7 +2398,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 
 	if (desc_cb->page_offset + truesize <= hnae3_page_size(ring)) {
 		desc_cb->reuse_flag = 1;
-		/* Bump ref count on page before it is given*/
+		/* Bump ref count on page before it is given */
 		get_page(desc_cb->priv);
 	} else if (page_count(desc_cb->priv) == 1) {
 		desc_cb->reuse_flag = 1;
@@ -2356,13 +2407,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	}
 }
 
-static int hns3_gro_complete(struct sk_buff *skb)
+static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
 {
 	__be16 type = skb->protocol;
 	struct tcphdr *th;
 	int depth = 0;
 
-	while (type == htons(ETH_P_8021Q)) {
+	while (eth_type_vlan(type)) {
 		struct vlan_hdr *vh;
 
 		if ((depth + VLAN_HLEN) > skb_headlen(skb))
@@ -2373,10 +2424,24 @@ static int hns3_gro_complete(struct sk_buff *skb)
 		depth += VLAN_HLEN;
 	}
 
+	skb_set_network_header(skb, depth);
+
 	if (type == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		depth += sizeof(struct iphdr);
+		skb_set_transport_header(skb, depth);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v4_check(skb->len - depth, iph->saddr,
+					  iph->daddr, 0);
 	} else if (type == htons(ETH_P_IPV6)) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
 		depth += sizeof(struct ipv6hdr);
+		skb_set_transport_header(skb, depth);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v6_check(skb->len - depth, &iph->saddr,
+					  &iph->daddr, 0);
 	} else {
 		netdev_err(skb->dev,
 			   "Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n",
@@ -2384,13 +2449,16 @@ static int hns3_gro_complete(struct sk_buff *skb)
 		return -EFAULT;
 	}
 
-	th = (struct tcphdr *)(skb->data + depth);
 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 	if (th->cwr)
 		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (l234info & BIT(HNS3_RXD_GRO_FIXID_B))
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
 
+	skb->csum_start = (unsigned char *)th - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -2508,7 +2576,7 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 	}
 }
 
-static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
+static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			  unsigned char *va)
 {
 #define HNS3_NEED_ADD_FRAG	1
@@ -2537,7 +2605,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
 		/* We can reuse buffer as-is, just make sure it is local */
-		if (likely(page_to_nid(desc_cb->priv) == numa_node_id()))
+		if (likely(page_to_nid(desc_cb->priv) == numa_mem_id()))
 			desc_cb->reuse_flag = 1;
 		else /* This page cannot be reused so discard it */
 			put_page(desc_cb->priv);
@@ -2574,7 +2642,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
 	 */
 	if (pending) {
 		pre_bd = (ring->next_to_clean - 1 + ring->desc_num) %
-			ring->desc_num;
+			 ring->desc_num;
 		pre_desc = &ring->desc[pre_bd];
 		bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info);
 	} else {
@@ -2628,21 +2696,22 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 				     struct sk_buff *skb, u32 l234info,
 				     u32 bd_base_info, u32 ol_info)
 {
-	u16 gro_count;
 	u32 l3_type;
 
-	gro_count = hnae3_get_field(l234info, HNS3_RXD_GRO_COUNT_M,
-				    HNS3_RXD_GRO_COUNT_S);
+	skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
+						    HNS3_RXD_GRO_SIZE_M,
+						    HNS3_RXD_GRO_SIZE_S);
 	/* if there is no HW GRO, do not set gro params */
-	if (!gro_count) {
+	if (!skb_shinfo(skb)->gso_size) {
 		hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info);
 		return 0;
 	}
 
-	NAPI_GRO_CB(skb)->count = gro_count;
+	NAPI_GRO_CB(skb)->count = hnae3_get_field(l234info,
+						  HNS3_RXD_GRO_COUNT_M,
+						  HNS3_RXD_GRO_COUNT_S);
 
-	l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
-				  HNS3_RXD_L3ID_S);
+	l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S);
 	if (l3_type == HNS3_L3_TYPE_IPV4)
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 	else if (l3_type == HNS3_L3_TYPE_IPV6)
@@ -2650,11 +2719,7 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 	else
 		return -EFAULT;
 
-	skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
-						    HNS3_RXD_GRO_SIZE_M,
-						    HNS3_RXD_GRO_SIZE_S);
-
-	return  hns3_gro_complete(skb);
+	return  hns3_gro_complete(skb, l234info);
 }
 
 static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
@@ -2703,14 +2768,6 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 					       vlan_tag);
 	}
 
-	if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.non_vld_descs++;
-		u64_stats_update_end(&ring->syncp);
-
-		return -EINVAL;
-	}
-
 	if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
 				  BIT(HNS3_RXD_L2E_B))))) {
 		u64_stats_update_begin(&ring->syncp);
@@ -2762,8 +2819,8 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 	struct sk_buff *skb = ring->skb;
 	struct hns3_desc_cb *desc_cb;
 	struct hns3_desc *desc;
+	unsigned int length;
 	u32 bd_base_info;
-	int length;
 	int ret;
 
 	desc = &ring->desc[ring->next_to_clean];
@@ -2828,14 +2885,14 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 		return ret;
 	}
 
+	skb_record_rx_queue(skb, ring->tqp->tqp_index);
 	*out_skb = skb;
 
 	return 0;
 }
 
-int hns3_clean_rx_ring(
-		struct hns3_enet_ring *ring, int budget,
-		void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
+int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
+		       void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
 {
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
 	int recv_pkts, recv_bds, clean_count, err;
@@ -2887,42 +2944,25 @@ int hns3_clean_rx_ring(
 out:
 	/* Make all data has been write before submit */
 	if (clean_count + unused_count > 0)
-		hns3_nic_alloc_rx_buffers(ring,
-					  clean_count + unused_count);
+		hns3_nic_alloc_rx_buffers(ring, clean_count + unused_count);
 
 	return recv_pkts;
 }
 
-static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group)
 {
-	struct hns3_enet_tqp_vector *tqp_vector =
-					ring_group->ring->tqp_vector;
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
 	enum hns3_flow_level_range new_flow_level;
-	int packets_per_msecs;
-	int bytes_per_msecs;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int packets_per_msecs, bytes_per_msecs;
 	u32 time_passed_ms;
-	u16 new_int_gl;
-
-	if (!tqp_vector->last_jiffies)
-		return false;
-
-	if (ring_group->total_packets == 0) {
-		ring_group->coal.int_gl = HNS3_INT_GL_50K;
-		ring_group->coal.flow_level = HNS3_FLOW_LOW;
-		return true;
-	}
 
-	/* Simple throttlerate management
-	 * 0-10MB/s   lower     (50000 ints/s)
-	 * 10-20MB/s   middle    (20000 ints/s)
-	 * 20-1249MB/s high      (18000 ints/s)
-	 * > 40000pps  ultra     (8000 ints/s)
-	 */
-	new_flow_level = ring_group->coal.flow_level;
-	new_int_gl = ring_group->coal.int_gl;
+	tqp_vector = ring_group->ring->tqp_vector;
 	time_passed_ms =
 		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
-
 	if (!time_passed_ms)
 		return false;
 
@@ -2932,9 +2972,14 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	do_div(ring_group->total_bytes, time_passed_ms);
 	bytes_per_msecs = ring_group->total_bytes;
 
-#define HNS3_RX_LOW_BYTE_RATE 10000
-#define HNS3_RX_MID_BYTE_RATE 20000
+	new_flow_level = ring_group->coal.flow_level;
 
+	/* Simple throttlerate management
+	 * 0-10MB/s   lower     (50000 ints/s)
+	 * 10-20MB/s   middle    (20000 ints/s)
+	 * 20-1249MB/s high      (18000 ints/s)
+	 * > 40000pps  ultra     (8000 ints/s)
+	 */
 	switch (new_flow_level) {
 	case HNS3_FLOW_LOW:
 		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
@@ -2954,13 +2999,40 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 		break;
 	}
 
-#define HNS3_RX_ULTRA_PACKET_RATE 40
-
 	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
 	    &tqp_vector->rx_group == ring_group)
 		new_flow_level = HNS3_FLOW_ULTRA;
 
-	switch (new_flow_level) {
+	ring_group->total_bytes = 0;
+	ring_group->total_packets = 0;
+	ring_group->coal.flow_level = new_flow_level;
+
+	return true;
+}
+
+static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+{
+	struct hns3_enet_tqp_vector *tqp_vector;
+	u16 new_int_gl;
+
+	if (!ring_group->ring)
+		return false;
+
+	tqp_vector = ring_group->ring->tqp_vector;
+	if (!tqp_vector->last_jiffies)
+		return false;
+
+	if (ring_group->total_packets == 0) {
+		ring_group->coal.int_gl = HNS3_INT_GL_50K;
+		ring_group->coal.flow_level = HNS3_FLOW_LOW;
+		return true;
+	}
+
+	if (!hns3_get_new_flow_lvl(ring_group))
+		return false;
+
+	new_int_gl = ring_group->coal.int_gl;
+	switch (ring_group->coal.flow_level) {
 	case HNS3_FLOW_LOW:
 		new_int_gl = HNS3_INT_GL_50K;
 		break;
@@ -2977,9 +3049,6 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 		break;
 	}
 
-	ring_group->total_bytes = 0;
-	ring_group->total_packets = 0;
-	ring_group->coal.flow_level = new_flow_level;
 	if (new_int_gl != ring_group->coal.int_gl) {
 		ring_group->coal.int_gl = new_int_gl;
 		return true;
@@ -3280,6 +3349,7 @@ static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
 	if (!vector)
 		return -ENOMEM;
 
+	/* save the actual available vector number */
 	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
 
 	priv->vector_num = vector_num;
@@ -3331,8 +3401,6 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
-			irq_set_affinity_notifier(tqp_vector->vector_irq,
-						  NULL);
 			irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
 			free_irq(tqp_vector->vector_irq, tqp_vector);
 			tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
@@ -3364,7 +3432,7 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
 }
 
 static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
-			     int ring_type)
+			     unsigned int ring_type)
 {
 	struct hns3_nic_ring_data *ring_data = priv->ring_data;
 	int queue_num = priv->ae_handle->kinfo.num_tqps;
@@ -3550,8 +3618,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
 	struct hnae3_queue *q = ring->tqp;
 
 	if (!HNAE3_IS_TX_RING(ring)) {
-		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG,
-			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG, (u32)dma);
 		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG,
 			       (u32)((dma >> 31) >> 1));
 
@@ -3851,6 +3918,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_client_stop(handle);
 
+	hns3_uninit_phy(netdev);
+
 	if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
 		netdev_warn(netdev, "already uninitialized\n");
 		goto out_netdev_free;
@@ -3858,9 +3927,7 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_del_all_fd_rules(netdev, true);
 
-	hns3_force_clear_all_rx_ring(handle);
-
-	hns3_uninit_phy(netdev);
+	hns3_clear_all_ring(handle, true);
 
 	hns3_nic_uninit_vector_data(priv);
 
@@ -3997,8 +4064,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
 					    ret);
 				return ret;
 			}
-			hns3_replace_buffer(ring, ring->next_to_use,
-					    &res_cbs);
+			hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
 		}
 		ring_ptr_move_fw(ring, next_to_use);
 	}
@@ -4030,40 +4096,26 @@ static void hns3_force_clear_rx_ring(struct hns3_enet_ring *ring)
 	}
 }
 
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h)
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force)
 {
 	struct net_device *ndev = h->kinfo.netdev;
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	struct hns3_enet_ring *ring;
 	u32 i;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
-		hns3_force_clear_rx_ring(ring);
-	}
-}
-
-static void hns3_clear_all_ring(struct hnae3_handle *h)
-{
-	struct net_device *ndev = h->kinfo.netdev;
-	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	u32 i;
-
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		struct netdev_queue *dev_queue;
 		struct hns3_enet_ring *ring;
 
 		ring = priv->ring_data[i].ring;
 		hns3_clear_tx_ring(ring);
-		dev_queue = netdev_get_tx_queue(ndev,
-						priv->ring_data[i].queue_index);
-		netdev_tx_reset_queue(dev_queue);
 
 		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
 		/* Continue to clear other rings even if clearing some
 		 * rings failed.
 		 */
-		hns3_clear_rx_ring(ring);
+		if (force)
+			hns3_force_clear_rx_ring(ring);
+		else
+			hns3_clear_rx_ring(ring);
 	}
 }
 
@@ -4173,7 +4225,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 		if (ret) {
 			set_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
 			netdev_err(kinfo->netdev,
-				   "hns net up fail, ret=%d!\n", ret);
+				   "net up fail, ret=%d!\n", ret);
 			return ret;
 		}
 	}
@@ -4251,12 +4303,8 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
 	vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
 	hns3_enable_vlan_filter(netdev, vlan_filter_enable);
 
-	/* Hardware table is only clear when pf resets */
-	if (!(handle->flags & HNAE3_SUPPORT_VF)) {
-		ret = hns3_restore_vlan(netdev);
-		if (ret)
-			return ret;
-	}
+	if (handle->ae_algo->ops->restore_vlan_table)
+		handle->ae_algo->ops->restore_vlan_table(handle);
 
 	return hns3_restore_fd_rules(netdev);
 }
@@ -4272,7 +4320,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 		return 0;
 	}
 
-	hns3_force_clear_all_rx_ring(handle);
+	hns3_clear_all_ring(handle, true);
+	hns3_reset_tx_queue(priv->ae_handle);
 
 	hns3_nic_uninit_vector_data(priv);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index c14480f9b625..848b866761df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -145,7 +145,7 @@ enum hns3_nic_state {
 #define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
 #define HNS3_RXD_LKBK_B				15
 #define HNS3_RXD_GRO_SIZE_S			16
-#define HNS3_RXD_GRO_SIZE_M			(0x3ff << HNS3_RXD_GRO_SIZE_S)
+#define HNS3_RXD_GRO_SIZE_M			(0x3fff << HNS3_RXD_GRO_SIZE_S)
 
 #define HNS3_TXD_L3T_S				0
 #define HNS3_TXD_L3T_M				(0x3 << HNS3_TXD_L3T_S)
@@ -384,7 +384,6 @@ struct ring_stats {
 			u64 rx_err_cnt;
 			u64 reuse_pg_cnt;
 			u64 err_pkt_len;
-			u64 non_vld_descs;
 			u64 err_bd_num;
 			u64 l2_err;
 			u64 l3l4_csum_err;
@@ -417,7 +416,7 @@ struct hns3_enet_ring {
 	 */
 	int next_to_clean;
 
-	int pull_len; /* head length for current packet */
+	u32 pull_len; /* head length for current packet */
 	u32 frag_num;
 	unsigned char *va; /* first buffer address for current packet */
 
@@ -446,25 +445,6 @@ enum hns3_flow_level_range {
 	HNS3_FLOW_ULTRA = 3,
 };
 
-enum hns3_link_mode_bits {
-	HNS3_LM_FIBRE_BIT = BIT(0),
-	HNS3_LM_AUTONEG_BIT = BIT(1),
-	HNS3_LM_TP_BIT = BIT(2),
-	HNS3_LM_PAUSE_BIT = BIT(3),
-	HNS3_LM_BACKPLANE_BIT = BIT(4),
-	HNS3_LM_10BASET_HALF_BIT = BIT(5),
-	HNS3_LM_10BASET_FULL_BIT = BIT(6),
-	HNS3_LM_100BASET_HALF_BIT = BIT(7),
-	HNS3_LM_100BASET_FULL_BIT = BIT(8),
-	HNS3_LM_1000BASET_FULL_BIT = BIT(9),
-	HNS3_LM_10000BASEKR_FULL_BIT = BIT(10),
-	HNS3_LM_25000BASEKR_FULL_BIT = BIT(11),
-	HNS3_LM_40000BASELR4_FULL_BIT = BIT(12),
-	HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13),
-	HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14),
-	HNS3_LM_COUNT = 15
-};
-
 #define HNS3_INT_GL_MAX			0x1FE0
 #define HNS3_INT_GL_50K			0x0014
 #define HNS3_INT_GL_20K			0x0032
@@ -550,7 +530,6 @@ struct hns3_nic_priv {
 	struct notifier_block notifier_block;
 	/* Vxlan/Geneve information */
 	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
 };
@@ -631,7 +610,7 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
 
 #define hnae3_buf_size(_ring) ((_ring)->buf_size)
 #define hnae3_page_order(_ring) (get_order(hnae3_buf_size(_ring)))
-#define hnae3_page_size(_ring) (PAGE_SIZE << hnae3_page_order(_ring))
+#define hnae3_page_size(_ring) (PAGE_SIZE << (u32)hnae3_page_order(_ring))
 
 /* iterator for handling rings in ring group */
 #define hns3_for_each_ring(pos, head) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index d1588ea6132c..5bff98a9b0dc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -44,7 +44,6 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 	HNS3_TQP_STAT("errors", rx_err_cnt),
 	HNS3_TQP_STAT("reuse_pg_cnt", reuse_pg_cnt),
 	HNS3_TQP_STAT("err_pkt_len", err_pkt_len),
-	HNS3_TQP_STAT("non_vld_descs", non_vld_descs),
 	HNS3_TQP_STAT("err_bd_num", err_bd_num),
 	HNS3_TQP_STAT("l2_err", l2_err),
 	HNS3_TQP_STAT("l3l4_csum_err", l3l4_csum_err),
@@ -60,6 +59,7 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 #define HNS3_NIC_LB_TEST_PKT_NUM	1
 #define HNS3_NIC_LB_TEST_RING_ID	0
 #define HNS3_NIC_LB_TEST_PACKET_SIZE	128
+#define HNS3_NIC_LB_SETUP_USEC		10000
 
 /* Nic loopback test err  */
 #define HNS3_NIC_LB_TEST_NO_MEM_ERR	1
@@ -117,7 +117,7 @@ static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode)
 		return ret;
 
 	ret = hns3_lp_setup(ndev, loop_mode, true);
-	usleep_range(10000, 20000);
+	usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2);
 
 	return ret;
 }
@@ -132,7 +132,7 @@ static int hns3_lp_down(struct net_device *ndev, enum hnae3_loop loop_mode)
 		return ret;
 	}
 
-	usleep_range(10000, 20000);
+	usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2);
 
 	return 0;
 }
@@ -149,6 +149,12 @@ static void hns3_lp_setup_skb(struct sk_buff *skb)
 	packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE);
 
 	memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN);
+
+	/* The dst mac addr of loopback packet is the same as the host'
+	 * mac addr, the SSU component may loop back the packet to host
+	 * before the packet reaches mac or serdes, which will defect
+	 * the purpose of mac or serdes selftest.
+	 */
 	ethh->h_dest[5] += 0x1f;
 	eth_zero_addr(ethh->h_source);
 	ethh->h_proto = htons(ETH_P_ARP);
@@ -243,11 +249,13 @@ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode)
 
 		skb_get(skb);
 		tx_ret = hns3_nic_net_xmit(skb, ndev);
-		if (tx_ret == NETDEV_TX_OK)
+		if (tx_ret == NETDEV_TX_OK) {
 			good_cnt++;
-		else
+		} else {
+			kfree_skb(skb);
 			netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n",
 				   tx_ret);
+		}
 	}
 	if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
 		ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR;
@@ -327,6 +335,13 @@ static void hns3_self_test(struct net_device *ndev,
 		h->ae_algo->ops->enable_vlan_filter(h, false);
 #endif
 
+	/* Tell firmware to stop mac autoneg before loopback test start,
+	 * otherwise loopback test may be failed when the port is still
+	 * negotiating.
+	 */
+	if (h->ae_algo->ops->halt_autoneg)
+		h->ae_algo->ops->halt_autoneg(h, true);
+
 	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
 	for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) {
@@ -349,6 +364,9 @@ static void hns3_self_test(struct net_device *ndev,
 
 	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+	if (h->ae_algo->ops->halt_autoneg)
+		h->ae_algo->ops->halt_autoneg(h, false);
+
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	if (dis_vlan_filter)
 		h->ae_algo->ops->enable_vlan_filter(h, true);
@@ -435,7 +453,7 @@ static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	switch (stringset) {
 	case ETH_SS_STATS:
 		buff = hns3_get_strings_tqps(h, buff);
-		h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff);
+		ops->get_strings(h, stringset, (u8 *)buff);
 		break;
 	case ETH_SS_TEST:
 		ops->get_strings(h, stringset, data);
@@ -510,6 +528,11 @@ static void hns3_get_drvinfo(struct net_device *netdev,
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
 
+	if (!h->ae_algo->ops->get_fw_version) {
+		netdev_err(netdev, "could not get fw version!\n");
+		return;
+	}
+
 	strncpy(drvinfo->version, hns3_driver_version,
 		sizeof(drvinfo->version));
 	drvinfo->version[sizeof(drvinfo->version) - 1] = '\0';
@@ -530,7 +553,7 @@ static u32 hns3_get_link(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
+	if (h->ae_algo->ops->get_status)
 		return h->ae_algo->ops->get_status(h);
 	else
 		return 0;
@@ -560,7 +583,7 @@ static void hns3_get_pauseparam(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
+	if (h->ae_algo->ops->get_pauseparam)
 		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
 			&param->rx_pause, &param->tx_pause);
 }
@@ -610,9 +633,6 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 	u8 media_type;
 	u8 link_stat;
 
-	if (!h->ae_algo || !h->ae_algo->ops)
-		return -EOPNOTSUPP;
-
 	ops = h->ae_algo->ops;
 	if (ops->get_media_type)
 		ops->get_media_type(h, &media_type, &module_type);
@@ -740,8 +760,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops ||
-	    !h->ae_algo->ops->get_rss_key_size)
+	if (!h->ae_algo->ops->get_rss_key_size)
 		return 0;
 
 	return h->ae_algo->ops->get_rss_key_size(h);
@@ -751,8 +770,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops ||
-	    !h->ae_algo->ops->get_rss_indir_size)
+	if (!h->ae_algo->ops->get_rss_indir_size)
 		return 0;
 
 	return h->ae_algo->ops->get_rss_indir_size(h);
@@ -763,7 +781,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
+	if (!h->ae_algo->ops->get_rss)
 		return -EOPNOTSUPP;
 
 	return h->ae_algo->ops->get_rss(h, indir, key, hfunc);
@@ -774,7 +792,7 @@ static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
+	if (!h->ae_algo->ops->set_rss)
 		return -EOPNOTSUPP;
 
 	if ((h->pdev->revision == 0x20 &&
@@ -799,9 +817,6 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops)
-		return -EOPNOTSUPP;
-
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
 		cmd->data = h->kinfo.num_tqps;
@@ -915,9 +930,6 @@ static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops)
-		return -EOPNOTSUPP;
-
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXFH:
 		if (h->ae_algo->ops->set_rss_tuple)
@@ -1193,7 +1205,7 @@ static int hns3_set_phys_id(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_led_id)
+	if (!h->ae_algo->ops->set_led_id)
 		return -EOPNOTSUPP;
 
 	return h->ae_algo->ops->set_led_id(h, state);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index fbd904e3077c..22f6acd45d9a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -110,8 +110,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+				ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
 	} else {
@@ -120,8 +119,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+				ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
 	}
@@ -175,7 +173,11 @@ static bool hclge_is_special_opcode(u16 opcode)
 			     HCLGE_OPC_STATS_MAC,
 			     HCLGE_OPC_STATS_MAC_ALL,
 			     HCLGE_OPC_QUERY_32_BIT_REG,
-			     HCLGE_OPC_QUERY_64_BIT_REG};
+			     HCLGE_OPC_QUERY_64_BIT_REG,
+			     HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+			     HCLGE_QUERY_CLEAR_PF_RAS_INT,
+			     HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -186,12 +188,43 @@ static bool hclge_is_special_opcode(u16 opcode)
 	return false;
 }
 
+static int hclge_cmd_convert_err_code(u16 desc_ret)
+{
+	switch (desc_ret) {
+	case HCLGE_CMD_EXEC_SUCCESS:
+		return 0;
+	case HCLGE_CMD_NO_AUTH:
+		return -EPERM;
+	case HCLGE_CMD_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case HCLGE_CMD_QUEUE_FULL:
+		return -EXFULL;
+	case HCLGE_CMD_NEXT_ERR:
+		return -ENOSR;
+	case HCLGE_CMD_UNEXE_ERR:
+		return -ENOTBLK;
+	case HCLGE_CMD_PARA_ERR:
+		return -EINVAL;
+	case HCLGE_CMD_RESULT_ERR:
+		return -ERANGE;
+	case HCLGE_CMD_TIMEOUT:
+		return -ETIME;
+	case HCLGE_CMD_HILINK_ERR:
+		return -ENOLINK;
+	case HCLGE_CMD_QUEUE_ILLEGAL:
+		return -ENXIO;
+	case HCLGE_CMD_INVALID:
+		return -EBADR;
+	default:
+		return -EIO;
+	}
+}
+
 static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
 				  int num, int ntc)
 {
 	u16 opcode, desc_ret;
 	int handle;
-	int retval;
 
 	opcode = le16_to_cpu(desc[0].opcode);
 	for (handle = 0; handle < num; handle++) {
@@ -205,17 +238,9 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
 	else
 		desc_ret = le16_to_cpu(desc[0].retval);
 
-	if (desc_ret == HCLGE_CMD_EXEC_SUCCESS)
-		retval = 0;
-	else if (desc_ret == HCLGE_CMD_NO_AUTH)
-		retval = -EPERM;
-	else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED)
-		retval = -EOPNOTSUPP;
-	else
-		retval = -EIO;
 	hw->cmq.last_status = desc_ret;
 
-	return retval;
+	return hclge_cmd_convert_err_code(desc_ret);
 }
 
 /**
@@ -230,6 +255,7 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 {
 	struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
+	struct hclge_cmq_ring *csq = &hw->cmq.csq;
 	struct hclge_desc *desc_to_use;
 	bool complete = false;
 	u32 timeout = 0;
@@ -239,8 +265,16 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 
 	spin_lock_bh(&hw->cmq.csq.lock);
 
-	if (num > hclge_ring_space(&hw->cmq.csq) ||
-	    test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
+	if (num > hclge_ring_space(&hw->cmq.csq)) {
+		/* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+		 * need update the SW HEAD pointer csq->next_to_clean
+		 */
+		csq->next_to_clean = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
 		spin_unlock_bh(&hw->cmq.csq.lock);
 		return -EBUSY;
 	}
@@ -278,7 +312,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	}
 
 	if (!complete) {
-		retval = -EAGAIN;
+		retval = -EBADE;
 	} else {
 		retval = hclge_cmd_check_retval(hw, desc, num, ntc);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d79a209b80f6..96840d8f3e24 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -41,6 +41,14 @@ enum hclge_cmd_return_status {
 	HCLGE_CMD_NO_AUTH	= 1,
 	HCLGE_CMD_NOT_SUPPORTED	= 2,
 	HCLGE_CMD_QUEUE_FULL	= 3,
+	HCLGE_CMD_NEXT_ERR	= 4,
+	HCLGE_CMD_UNEXE_ERR	= 5,
+	HCLGE_CMD_PARA_ERR	= 6,
+	HCLGE_CMD_RESULT_ERR	= 7,
+	HCLGE_CMD_TIMEOUT	= 8,
+	HCLGE_CMD_HILINK_ERR	= 9,
+	HCLGE_CMD_QUEUE_ILLEGAL	= 10,
+	HCLGE_CMD_INVALID	= 11,
 };
 
 enum hclge_cmd_status {
@@ -180,6 +188,9 @@ enum hclge_opcode_type {
 	HCLGE_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
 	HCLGE_OPC_RESET_TQP_QUEUE	= 0x0B22,
 
+	/* PPU commands */
+	HCLGE_OPC_PPU_PF_OTHER_INT_DFX	= 0x0B4A,
+
 	/* TSO command */
 	HCLGE_OPC_TSO_GENERIC_CONFIG	= 0x0C01,
 	HCLGE_OPC_GRO_GENERIC_CONFIG    = 0x0C10,
@@ -243,6 +254,9 @@ enum hclge_opcode_type {
 
 	/* NCL config command */
 	HCLGE_OPC_QUERY_NCL_CONFIG	= 0x7011,
+	/* M7 stats command */
+	HCLGE_OPC_M7_STATS_BD		= 0x7012,
+	HCLGE_OPC_M7_STATS_INFO		= 0x7013,
 
 	/* SFP command */
 	HCLGE_OPC_GET_SFP_INFO		= 0x7104,
@@ -265,6 +279,8 @@ enum hclge_opcode_type {
 	HCLGE_CONFIG_ROCEE_RAS_INT_EN	= 0x1580,
 	HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
 	HCLGE_ROCEE_PF_RAS_INT_CMD	= 0x1584,
+	HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD	= 0x1585,
+	HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD	= 0x1586,
 	HCLGE_IGU_EGU_TNL_INT_EN	= 0x1803,
 	HCLGE_IGU_COMMON_INT_EN		= 0x1806,
 	HCLGE_TM_QCN_MEM_INT_CFG	= 0x1A14,
@@ -641,6 +657,11 @@ enum hclge_mac_vlan_tbl_opcode {
 	HCLGE_MAC_VLAN_LKUP,    /* Lookup a entry through mac_vlan key */
 };
 
+enum hclge_mac_vlan_add_resp_code {
+	HCLGE_ADD_UC_OVERFLOW = 2,	/* ADD failed for UC overflow */
+	HCLGE_ADD_MC_OVERFLOW,		/* ADD failed for MC overflow */
+};
+
 #define HCLGE_MAC_VLAN_BIT0_EN_B	0
 #define HCLGE_MAC_VLAN_BIT1_EN_B	1
 #define HCLGE_MAC_EPORT_SW_EN_B		12
@@ -674,7 +695,6 @@ struct hclge_umv_spc_alc_cmd {
 #define HCLGE_MAC_MGR_MASK_VLAN_B		BIT(0)
 #define HCLGE_MAC_MGR_MASK_MAC_B		BIT(1)
 #define HCLGE_MAC_MGR_MASK_ETHERTYPE_B		BIT(2)
-#define HCLGE_MAC_ETHERTYPE_LLDP		0x88cc
 
 struct hclge_mac_mgr_tbl_entry_cmd {
 	u8      flags;
@@ -872,7 +892,7 @@ struct hclge_serdes_lb_cmd {
 #define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
 #define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
 #define HCLGE_DEFAULT_NON_DCB_DV	0x7800	/* 30K byte */
-#define HCLGE_NON_DCB_ADDITIONAL_BUF	0x200	/* 512 byte */
+#define HCLGE_NON_DCB_ADDITIONAL_BUF	0x1400	/* 5120 byte */
 
 #define HCLGE_TYPE_CRQ			0
 #define HCLGE_TYPE_CSQ			1
@@ -970,6 +990,25 @@ struct hclge_fd_ad_config_cmd {
 	u8 rsv2[8];
 };
 
+struct hclge_get_m7_bd_cmd {
+	__le32 bd_num;
+	u8 rsv[20];
+};
+
+struct hclge_query_ppu_pf_other_int_dfx_cmd {
+	__le16 over_8bd_no_fe_qid;
+	__le16 over_8bd_no_fe_vf_id;
+	__le16 tso_mss_cmp_min_err_qid;
+	__le16 tso_mss_cmp_min_err_vf_id;
+	__le16 tso_mss_cmp_max_err_qid;
+	__le16 tso_mss_cmp_max_err_vf_id;
+	__le16 tx_rd_fbd_poison_qid;
+	__le16 tx_rd_fbd_poison_vf_id;
+	__le16 rx_rd_fbd_poison_qid;
+	__le16 rx_rd_fbd_poison_vf_id;
+	u8 rsv[4];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 1161361a973b..bac4ce13f6ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -325,6 +325,8 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 	hdev->tm_info.hw_pfc_map = pfc_map;
 	hdev->tm_info.pfc_en = pfc->pfc_en;
 
+	hclge_tm_pfc_info_update(hdev);
+
 	return hclge_pause_setup_hw(hdev, false);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index a9ffb57c4607..ab625c757a95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -61,9 +61,11 @@ static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
 
 static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 				      struct hclge_dbg_dfx_message *dfx_message,
-				      char *cmd_buf, int msg_num, int offset,
-				      enum hclge_opcode_type cmd)
+				      const char *cmd_buf, int msg_num,
+				      int offset, enum hclge_opcode_type cmd)
 {
+#define BD_DATA_NUM       6
+
 	struct hclge_desc *desc_src;
 	struct hclge_desc *desc;
 	int bd_num, buf_len;
@@ -92,14 +94,16 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 		return;
 	}
 
-	max = (bd_num * 6) <= msg_num ? (bd_num * 6) : msg_num;
+	max = (bd_num * BD_DATA_NUM) <= msg_num ?
+		(bd_num * BD_DATA_NUM) : msg_num;
 
 	desc = desc_src;
 	for (i = 0; i < max; i++) {
-		(((i / 6) > 0) && ((i % 6) == 0)) ? desc++ : desc;
+		((i > 0) && ((i % BD_DATA_NUM) == 0)) ? desc++ : desc;
 		if (dfx_message->flag)
 			dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
-				 dfx_message->message, desc->data[i % 6]);
+				 dfx_message->message,
+				 desc->data[i % BD_DATA_NUM]);
 
 		dfx_message++;
 	}
@@ -107,7 +111,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 	kfree(desc_src);
 }
 
-static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *cmd_buf)
+static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_dbg_bitmap_cmd *bitmap;
@@ -207,7 +211,7 @@ static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *cmd_buf)
 	dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]);
 }
 
-static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, char *cmd_buf)
+static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
 {
 	int msg_num;
 
@@ -395,7 +399,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	if (ret)
 		goto err_tm_pg_cmd_send;
 
-	dev_info(&hdev->pdev->dev, "PRI_SCH pg_id: %u\n", desc.data[0]);
+	dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n", desc.data[0]);
 
 	cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -403,7 +407,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	if (ret)
 		goto err_tm_pg_cmd_send;
 
-	dev_info(&hdev->pdev->dev, "QS_SCH pg_id: %u\n", desc.data[0]);
+	dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]);
 
 	cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -412,9 +416,9 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 		goto err_tm_pg_cmd_send;
 
 	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "BP_TO_QSET pg_id: %u\n",
+	dev_info(&hdev->pdev->dev, "BP_TO_QSET tc_id: %u\n",
 		 bp_to_qs_map_cmd->tc_id);
-	dev_info(&hdev->pdev->dev, "BP_TO_QSET pg_shapping: 0x%x\n",
+	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n",
 		 bp_to_qs_map_cmd->qs_group_id);
 	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n",
 		 bp_to_qs_map_cmd->qs_bit_map);
@@ -473,7 +477,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 
 	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n", nq_to_qs_map->nq_id);
-	dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: %u\n",
+	dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n",
 		 nq_to_qs_map->qset_id);
 
 	cmd = HCLGE_OPC_TM_PG_WEIGHT;
@@ -537,7 +541,8 @@ err_tm_cmd_send:
 		cmd, ret);
 }
 
-static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, char *cmd_buf)
+static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
+				  const char *cmd_buf)
 {
 	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
 	struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
@@ -921,11 +926,67 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 		 hdev->rst_stats.reset_cnt);
 }
 
+void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+{
+	struct hclge_desc *desc_src, *desc_tmp;
+	struct hclge_get_m7_bd_cmd *req;
+	struct hclge_desc desc;
+	u32 bd_num, buf_len;
+	int ret, i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true);
+
+	req = (struct hclge_get_m7_bd_cmd *)desc.data;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics bd number failed, ret=%d\n",
+			ret);
+		return;
+	}
+
+	bd_num = le32_to_cpu(req->bd_num);
+
+	buf_len	 = sizeof(struct hclge_desc) * bd_num;
+	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_src) {
+		dev_err(&hdev->pdev->dev,
+			"allocate desc for get_m7_stats failed\n");
+		return;
+	}
+
+	desc_tmp = desc_src;
+	ret  = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num,
+				  HCLGE_OPC_M7_STATS_INFO);
+	if (ret) {
+		kfree(desc_src);
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics failed, ret=%d\n", ret);
+		return;
+	}
+
+	for (i = 0; i < bd_num; i++) {
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[0]),
+			 le32_to_cpu(desc_tmp->data[1]),
+			 le32_to_cpu(desc_tmp->data[2]));
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[3]),
+			 le32_to_cpu(desc_tmp->data[4]),
+			 le32_to_cpu(desc_tmp->data[5]));
+
+		desc_tmp++;
+	}
+
+	kfree(desc_src);
+}
+
 /* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
  * @hdev: pointer to struct hclge_dev
  * @cmd_buf: string that contains offset and length
  */
-static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, char *cmd_buf)
+static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
+				      const char *cmd_buf)
 {
 #define HCLGE_MAX_NCL_CONFIG_OFFSET	4096
 #define HCLGE_MAX_NCL_CONFIG_LENGTH	(20 + 24 * 4)
@@ -998,13 +1059,13 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
 
 	while (kfifo_get(&hdev->mac_tnl_log, &stats)) {
 		rem_nsec = do_div(stats.time, HCLGE_BILLION_NANO_SECONDS);
-		dev_info(&hdev->pdev->dev, "[%07lu.%03lu]status = 0x%x\n",
+		dev_info(&hdev->pdev->dev, "[%07lu.%03lu] status = 0x%x\n",
 			 (unsigned long)stats.time, rem_nsec / 1000,
 			 stats.status);
 	}
 }
 
-int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
+int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -1029,6 +1090,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
 		hclge_dbg_dump_reg_cmd(hdev, cmd_buf);
 	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
 		hclge_dbg_dump_rst_info(hdev);
+	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
+		hclge_dbg_get_m7_stats_info(hdev);
 	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
 		hclge_dbg_dump_ncl_config(hdev,
 					  &cmd_buf[sizeof("dump ncl_config")]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 4ac80634c984..0a7243825e7b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -87,25 +87,25 @@ static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
 
 static const struct hclge_hw_error hclge_igu_int[] = {
 	{ .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ /* sentinel */ }
 };
 
 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
 	{ .int_msk = BIT(0), .msg = "rx_buf_overflow",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(3), .msg = "tx_buf_overflow",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(4), .msg = "tx_buf_underrun",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ /* sentinel */ }
 };
 
@@ -413,13 +413,13 @@ static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
 
 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
 	{ .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
-	  .reset_level = HNAE3_CORE_RESET },
+	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ /* sentinel */ }
 };
 
@@ -631,29 +631,20 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
 	{ /* sentinel */ }
 };
 
-static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg,
-					     const struct hclge_hw_error *err,
-					     u32 err_sts)
+static void hclge_log_error(struct device *dev, char *reg,
+			    const struct hclge_hw_error *err,
+			    u32 err_sts, unsigned long *reset_requests)
 {
-	enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET;
-	bool need_reset = false;
-
 	while (err->msg) {
 		if (err->int_msk & err_sts) {
 			dev_warn(dev, "%s %s found [error status=0x%x]\n",
 				 reg, err->msg, err_sts);
-			if (err->reset_level != HNAE3_NONE_RESET &&
-			    err->reset_level >= reset_level) {
-				reset_level = err->reset_level;
-				need_reset = true;
-			}
+			if (err->reset_level &&
+			    err->reset_level != HNAE3_NONE_RESET)
+				set_bit(err->reset_level, reset_requests);
 		}
 		err++;
 	}
-	if (need_reset)
-		return reset_level;
-	else
-		return HNAE3_NONE_RESET;
 }
 
 /* hclge_cmd_query_error: read the error information
@@ -673,19 +664,19 @@ static int hclge_cmd_query_error(struct hclge_dev *hdev,
 				 enum hclge_err_int_type int_type)
 {
 	struct device *dev = &hdev->pdev->dev;
-	int num = 1;
+	int desc_num = 1;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
 	if (flag) {
 		desc[0].flag |= cpu_to_le16(flag);
 		hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
-		num = 2;
+		desc_num = 2;
 	}
 	if (w_num)
 		desc[0].data[w_num] = cpu_to_le32(int_type);
 
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
 	if (ret)
 		dev_err(dev, "query error cmd failed (%d)\n", ret);
 
@@ -941,7 +932,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc[2];
-	int num = 1;
+	int desc_num = 1;
 	int ret;
 
 	/* configure PPU error interrupts */
@@ -960,7 +951,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
 		desc[1].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK;
 		desc[1].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK;
 		desc[1].data[3] |= HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK;
-		num = 2;
+		desc_num = 2;
 	} else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
 		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
 		if (en)
@@ -978,7 +969,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
 		return -EINVAL;
 	}
 
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
 
 	return ret;
 }
@@ -1069,12 +1060,51 @@ static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
 	return ret;
 }
 
-#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \
-	do { \
-		if (ae_dev->ops->set_default_reset_request) \
-			ae_dev->ops->set_default_reset_request(ae_dev, \
-							       reset_type); \
-	} while (0)
+/* hclge_query_bd_num: query number of buffer descriptors
+ * @hdev: pointer to struct hclge_dev
+ * @is_ras: true for ras, false for msix
+ * @mpf_bd_num: number of main PF interrupt buffer descriptors
+ * @pf_bd_num: number of not main PF interrupt buffer descriptors
+ *
+ * This function querys number of mpf and pf buffer descriptors.
+ */
+static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
+			      int *mpf_bd_num, int *pf_bd_num)
+{
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_min_bd_num, pf_min_bd_num;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc_bd;
+	int ret;
+
+	if (is_ras) {
+		opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM;
+		mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM;
+		pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM;
+	} else {
+		opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM;
+		mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM;
+		pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc_bd, opcode, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	if (ret) {
+		dev_err(dev, "fail(%d) to query msix int status bd num\n",
+			ret);
+		return ret;
+	}
+
+	*mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+	*pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+	if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
+		dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n",
+			*mpf_bd_num, *pf_bd_num);
+		return -EINVAL;
+	}
+
+	return 0;
+}
 
 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors
  * @hdev: pointer to struct hclge_dev
@@ -1089,7 +1119,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
 				      int num)
 {
 	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
-	enum hnae3_reset_type reset_level;
 	struct device *dev = &hdev->pdev->dev;
 	__le32 *desc_data;
 	u32 status;
@@ -1098,8 +1127,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
 	/* query all main PF RAS errors */
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
 				   true);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 	if (ret) {
 		dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
@@ -1108,95 +1135,74 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
 
 	/* log HNS common errors */
 	status = le32_to_cpu(desc[0].data[0]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
-					      &hclge_imp_tcm_ecc_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+				&hclge_imp_tcm_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(desc[0].data[1]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
-					      &hclge_cmdq_nic_mem_ecc_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+				&hclge_cmdq_nic_mem_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
-	if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
+	if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
 		dev_warn(dev, "imp_rd_data_poison_err found\n");
-		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_NONE_RESET);
-	}
 
 	status = le32_to_cpu(desc[0].data[3]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
-					      &hclge_tqp_int_ecc_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+				&hclge_tqp_int_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(desc[0].data[4]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS",
-					      &hclge_msix_sram_ecc_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "MSIX_ECC_INT_STS",
+				&hclge_msix_sram_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log SSU(Storage Switch Unit) errors */
 	desc_data = (__le32 *)&desc[2];
 	status = le32_to_cpu(*(desc_data + 2));
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
-					      &hclge_ssu_mem_ecc_err_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
+				&hclge_ssu_mem_ecc_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
 	if (status) {
 		dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
 			 status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+		set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
 	}
 
 	status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT",
-					      &hclge_ssu_com_err_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+				&hclge_ssu_com_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log IGU(Ingress Unit) errors */
 	desc_data = (__le32 *)&desc[3];
 	status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "IGU_INT_STS",
-					      &hclge_igu_int[0], status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "IGU_INT_STS",
+				&hclge_igu_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log PPP(Programmable Packet Process) errors */
 	desc_data = (__le32 *)&desc[4];
 	status = le32_to_cpu(*(desc_data + 1));
-	if (status) {
-		reset_level =
-			hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
-					&hclge_ppp_mpf_abnormal_int_st1[0],
-					status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+				&hclge_ppp_mpf_abnormal_int_st1[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
-	if (status) {
-		reset_level =
-			hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
-					&hclge_ppp_mpf_abnormal_int_st3[0],
-					status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppp_mpf_abnormal_int_st3[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log PPU(RCB) errors */
 	desc_data = (__le32 *)&desc[5];
@@ -1204,66 +1210,53 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
 	if (status) {
 		dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n",
 			 "rpu_rx_pkt_ecc_mbit_err");
-		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+		set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
 	}
 
 	status = le32_to_cpu(*(desc_data + 2));
-	if (status) {
-		reset_level =
-			hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
-					&hclge_ppu_mpf_abnormal_int_st2[0],
-					status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+				&hclge_ppu_mpf_abnormal_int_st2[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
-	if (status) {
-		reset_level =
-			hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
-					&hclge_ppu_mpf_abnormal_int_st3[0],
-					status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppu_mpf_abnormal_int_st3[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log TM(Traffic Manager) errors */
 	desc_data = (__le32 *)&desc[6];
 	status = le32_to_cpu(*desc_data);
-	if (status) {
-		reset_level = hclge_log_error(dev, "TM_SCH_RINT",
-					      &hclge_tm_sch_rint[0], status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "TM_SCH_RINT",
+				&hclge_tm_sch_rint[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log QCN(Quantized Congestion Control) errors */
 	desc_data = (__le32 *)&desc[7];
 	status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "QCN_FIFO_RINT",
-					      &hclge_qcn_fifo_rint[0], status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "QCN_FIFO_RINT",
+				&hclge_qcn_fifo_rint[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "QCN_ECC_RINT",
-					      &hclge_qcn_ecc_rint[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "QCN_ECC_RINT",
+				&hclge_qcn_ecc_rint[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log NCSI errors */
 	desc_data = (__le32 *)&desc[9];
 	status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT",
-					      &hclge_ncsi_err_int[0], status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+				&hclge_ncsi_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* clear all main PF RAS errors */
 	hclge_cmd_reuse_desc(&desc[0], false);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 	if (ret)
 		dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
@@ -1285,7 +1278,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
 {
 	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
 	struct device *dev = &hdev->pdev->dev;
-	enum hnae3_reset_type reset_level;
 	__le32 *desc_data;
 	u32 status;
 	int ret;
@@ -1293,8 +1285,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
 	/* query all PF RAS errors */
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
 				   true);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 	if (ret) {
 		dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
@@ -1303,53 +1293,41 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
 
 	/* log SSU(Storage Switch Unit) errors */
 	status = le32_to_cpu(desc[0].data[0]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
-					      &hclge_ssu_port_based_err_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(desc[0].data[1]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
-					      &hclge_ssu_fifo_overflow_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+				&hclge_ssu_fifo_overflow_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	status = le32_to_cpu(desc[0].data[2]);
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT",
-					      &hclge_ssu_ets_tcg_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_ETS_TCG_INT",
+				&hclge_ssu_ets_tcg_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
 	desc_data = (__le32 *)&desc[1];
 	status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
-					      &hclge_igu_egu_tnl_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+				&hclge_igu_egu_tnl_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* log PPU(RCB) errors */
 	desc_data = (__le32 *)&desc[3];
 	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
-					      &hclge_ppu_pf_abnormal_int[0],
-					      status);
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
-	}
+	if (status)
+		hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
+				&hclge_ppu_pf_abnormal_int[0], status,
+				&ae_dev->hw_err_reset_req);
 
 	/* clear all PF RAS errors */
 	hclge_cmd_reuse_desc(&desc[0], false);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 	if (ret)
 		dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
@@ -1359,24 +1337,16 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
 
 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
 {
-	struct device *dev = &hdev->pdev->dev;
 	u32 mpf_bd_num, pf_bd_num, bd_num;
-	struct hclge_desc desc_bd;
 	struct hclge_desc *desc;
 	int ret;
 
 	/* query the number of registers in the RAS int status */
-	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM,
-				   true);
-	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
-	if (ret) {
-		dev_err(dev, "fail(%d) to query ras int status bd num\n", ret);
+	ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num);
+	if (ret)
 		return ret;
-	}
-	mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
-	pf_bd_num = le32_to_cpu(desc_bd.data[1]);
-	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
 
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
 	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc)
 		return -ENOMEM;
@@ -1396,6 +1366,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
 	return ret;
 }
 
+static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[3];
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
+		return ret;
+	}
+
+	dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
+		 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+		 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+		 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+	dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
+		 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
+		 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
+		 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
+	dev_info(dev, "AXI3: %08X %08X %08X %08X\n",
+		 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
+		 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
+
+	return 0;
+}
+
+static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	ret = hclge_cmd_query_error(hdev, &desc[0],
+				    HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
+				    HCLGE_CMD_FLAG_NEXT, 0, 0);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
+		return ret;
+	}
+
+	dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
+		 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+		 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+		 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+	dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
+		 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
+
+	return 0;
+}
+
 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
 {
 	struct device *dev = &hdev->pdev->dev;
@@ -1403,8 +1433,7 @@ static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
 	int ret;
 
 	/* read overflow error status */
-	ret = hclge_cmd_query_error(hdev, &desc[0],
-				    HCLGE_ROCEE_PF_RAS_INT_CMD,
+	ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD,
 				    0, 0, 0);
 	if (ret) {
 		dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
@@ -1464,19 +1493,27 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 
 	status = le32_to_cpu(desc[0].data[0]);
 
-	if (status & HCLGE_ROCEE_RERR_INT_MASK) {
-		dev_warn(dev, "ROCEE RAS AXI rresp error\n");
-		reset_type = HNAE3_FUNC_RESET;
-	}
+	if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
+		if (status & HCLGE_ROCEE_RERR_INT_MASK)
+			dev_warn(dev, "ROCEE RAS AXI rresp error\n");
+
+		if (status & HCLGE_ROCEE_BERR_INT_MASK)
+			dev_warn(dev, "ROCEE RAS AXI bresp error\n");
 
-	if (status & HCLGE_ROCEE_BERR_INT_MASK) {
-		dev_warn(dev, "ROCEE RAS AXI bresp error\n");
 		reset_type = HNAE3_FUNC_RESET;
+
+		ret = hclge_log_rocee_axi_error(hdev);
+		if (ret)
+			return HNAE3_GLOBAL_RESET;
 	}
 
 	if (status & HCLGE_ROCEE_ECC_INT_MASK) {
 		dev_warn(dev, "ROCEE RAS 2bit ECC error\n");
 		reset_type = HNAE3_GLOBAL_RESET;
+
+		ret = hclge_log_rocee_ecc_error(hdev);
+		if (ret)
+			return HNAE3_GLOBAL_RESET;
 	}
 
 	if (status & HCLGE_ROCEE_OVF_INT_MASK) {
@@ -1486,7 +1523,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 			/* reset everything for now */
 			return HNAE3_GLOBAL_RESET;
 		}
-		reset_type = HNAE3_FUNC_RESET;
 	}
 
 	/* clear error status */
@@ -1501,7 +1537,7 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 	return reset_type;
 }
 
-static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
+int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc;
@@ -1539,7 +1575,7 @@ static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
 
 	reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
 	if (reset_type != HNAE3_NONE_RESET)
-		HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type);
+		set_bit(reset_type, &ae_dev->hw_err_reset_req);
 }
 
 static const struct hclge_hw_blk hw_blk[] = {
@@ -1574,10 +1610,9 @@ static const struct hclge_hw_blk hw_blk[] = {
 	{ /* sentinel */ }
 };
 
-int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state)
+int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
 {
 	const struct hclge_hw_blk *module = hw_blk;
-	struct device *dev = &hdev->pdev->dev;
 	int ret = 0;
 
 	while (module->name) {
@@ -1589,10 +1624,6 @@ int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state)
 		module++;
 	}
 
-	ret = hclge_config_rocee_ras_interrupt(hdev, state);
-	if (ret)
-		dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret);
-
 	return ret;
 }
 
@@ -1602,165 +1633,281 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 	struct device *dev = &hdev->pdev->dev;
 	u32 status;
 
+	if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
+		dev_err(dev,
+			"Can't recover - RAS error reported during dev init\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
 	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
 
+	if (status & HCLGE_RAS_REG_NFE_MASK ||
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
+		ae_dev->hw_err_reset_req = 0;
+	else
+		goto out;
+
 	/* Handling Non-fatal HNS RAS errors */
 	if (status & HCLGE_RAS_REG_NFE_MASK) {
 		dev_warn(dev,
 			 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
 			 status);
 		hclge_handle_all_ras_errors(hdev);
-	} else {
-		if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
-		    hdev->pdev->revision < 0x21) {
-			ae_dev->override_pci_need_reset = 1;
-			return PCI_ERS_RESULT_RECOVERED;
-		}
 	}
 
-	if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
-		dev_warn(dev, "ROCEE uncorrected RAS error identified\n");
+	/* Handling Non-fatal Rocee RAS errors */
+	if (hdev->pdev->revision >= 0x21 &&
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
+		dev_warn(dev, "ROCEE Non-Fatal RAS error identified\n");
 		hclge_handle_rocee_ras_error(ae_dev);
 	}
 
-	if (status & HCLGE_RAS_REG_NFE_MASK ||
-	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
-		ae_dev->override_pci_need_reset = 0;
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		goto out;
+
+	if (ae_dev->hw_err_reset_req)
 		return PCI_ERS_RESULT_NEED_RESET;
-	}
-	ae_dev->override_pci_need_reset = 1;
 
+out:
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
-			       unsigned long *reset_requests)
+static int hclge_clear_hw_msix_error(struct hclge_dev *hdev,
+				     struct hclge_desc *desc, bool is_mpf,
+				     u32 bd_num)
+{
+	if (is_mpf)
+		desc[0].opcode =
+			cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT);
+	else
+		desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT);
+
+	desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+
+	return hclge_cmd_send(&hdev->hw, &desc[0], bd_num);
+}
+
+/* hclge_query_8bd_info: query information about over_8bd_nfe_err
+ * @hdev: pointer to struct hclge_dev
+ * @vf_id: Index of the virtual function with error
+ * @q_id: Physical index of the queue with error
+ *
+ * This function get specific index of queue and function which causes
+ * over_8bd_nfe_err by using command. If vf_id is 0, it means error is
+ * caused by PF instead of VF.
+ */
+static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id,
+					 u16 *q_id)
+{
+	struct hclge_query_ppu_pf_other_int_dfx_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		return ret;
+
+	req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data;
+	*vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id);
+	*q_id = le16_to_cpu(req->over_8bd_no_fe_qid);
+
+	return 0;
+}
+
+/* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err
+ * @hdev: pointer to struct hclge_dev
+ * @reset_requests: reset level that we need to trigger later
+ *
+ * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in
+ * that case, we need to trigger VF reset. Otherwise, a PF reset is needed.
+ */
+static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
+				      unsigned long *reset_requests)
 {
-	struct hclge_mac_tnl_stats mac_tnl_stats;
 	struct device *dev = &hdev->pdev->dev;
-	u32 mpf_bd_num, pf_bd_num, bd_num;
-	enum hnae3_reset_type reset_level;
-	struct hclge_desc desc_bd;
-	struct hclge_desc *desc;
-	__le32 *desc_data;
-	u32 status;
+	u16 vf_id;
+	u16 q_id;
 	int ret;
 
-	/* query the number of bds for the MSIx int status */
-	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM,
-				   true);
-	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id);
 	if (ret) {
-		dev_err(dev, "fail(%d) to query msix int status bd num\n",
+		dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n",
 			ret);
-		return ret;
+		return;
 	}
 
-	mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
-	pf_bd_num = le32_to_cpu(desc_bd.data[1]);
-	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	dev_warn(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%d), queue_id(%d)\n",
+		 vf_id, q_id);
 
-	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
-	if (!desc)
-		goto out;
+	if (vf_id) {
+		if (vf_id >= hdev->num_alloc_vport) {
+			dev_err(dev, "invalid vf id(%d)\n", vf_id);
+			return;
+		}
+
+		/* If we need to trigger other reset whose level is higher
+		 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset
+		 * here.
+		 */
+		if (*reset_requests != 0)
+			return;
 
+		ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
+		if (ret)
+			dev_warn(dev, "inform reset to vf(%d) failed %d!\n",
+				 hdev->vport->vport_id, ret);
+	} else {
+		set_bit(HNAE3_FUNC_RESET, reset_requests);
+	}
+}
+
+/* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @mpf_bd_num: number of extended command structures
+ * @reset_requests: record of the reset level that we need
+ *
+ * This function handles all the main PF MSI-X errors in the hw register/s
+ * using command.
+ */
+static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev,
+				       struct hclge_desc *desc,
+				       int mpf_bd_num,
+				       unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
 	/* query all main PF MSIx errors */
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
 				   true);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
 	if (ret) {
-		dev_err(dev, "query all mpf msix int cmd failed (%d)\n",
-			ret);
-		goto msi_error;
+		dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret);
+		return ret;
 	}
 
 	/* log MAC errors */
 	desc_data = (__le32 *)&desc[1];
 	status = le32_to_cpu(*desc_data);
-	if (status) {
-		reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
-					      &hclge_mac_afifo_tnl_int[0],
-					      status);
-		set_bit(reset_level, reset_requests);
-	}
+	if (status)
+		hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+				&hclge_mac_afifo_tnl_int[0], status,
+				reset_requests);
 
 	/* log PPU(RCB) MPF errors */
 	desc_data = (__le32 *)&desc[5];
 	status = le32_to_cpu(*(desc_data + 2)) &
 			HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
-	if (status) {
-		reset_level =
-			hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
-					&hclge_ppu_mpf_abnormal_int_st2[0],
-					status);
-		set_bit(reset_level, reset_requests);
-	}
+	if (status)
+		dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]",
+			 status);
 
 	/* clear all main PF MSIx errors */
-	hclge_cmd_reuse_desc(&desc[0], false);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
+	if (ret)
+		dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret);
 
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
-	if (ret) {
-		dev_err(dev, "clear all mpf msix int cmd failed (%d)\n",
-			ret);
-		goto msi_error;
-	}
+	return ret;
+}
+
+/* hclge_handle_pf_msix_error: handle all PF MSI-X errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @mpf_bd_num: number of extended command structures
+ * @reset_requests: record of the reset level that we need
+ *
+ * This function handles all the PF MSI-X errors in the hw register/s using
+ * command.
+ */
+static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
+				      struct hclge_desc *desc,
+				      int pf_bd_num,
+				      unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
 
 	/* query all PF MSIx errors */
-	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
 				   true);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
 	if (ret) {
-		dev_err(dev, "query all pf msix int cmd failed (%d)\n",
-			ret);
-		goto msi_error;
+		dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret);
+		return ret;
 	}
 
 	/* log SSU PF errors */
 	status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
-					      &hclge_ssu_port_based_pf_int[0],
-					      status);
-		set_bit(reset_level, reset_requests);
-	}
+	if (status)
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_pf_int[0],
+				status, reset_requests);
 
 	/* read and log PPP PF errors */
 	desc_data = (__le32 *)&desc[2];
 	status = le32_to_cpu(*desc_data);
-	if (status) {
-		reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
-					      &hclge_ppp_pf_abnormal_int[0],
-					      status);
-		set_bit(reset_level, reset_requests);
-	}
+	if (status)
+		hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+				&hclge_ppp_pf_abnormal_int[0],
+				status, reset_requests);
 
 	/* log PPU(RCB) PF errors */
 	desc_data = (__le32 *)&desc[3];
 	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
-	if (status) {
-		reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
-					      &hclge_ppu_pf_abnormal_int[0],
-					      status);
-		set_bit(reset_level, reset_requests);
-	}
+	if (status)
+		hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+				&hclge_ppu_pf_abnormal_int[0],
+				status, reset_requests);
+
+	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
+	if (status)
+		hclge_handle_over_8bd_err(hdev, reset_requests);
 
 	/* clear all PF MSIx errors */
-	hclge_cmd_reuse_desc(&desc[0], false);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
+	if (ret)
+		dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret);
 
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
-	if (ret) {
-		dev_err(dev, "clear all pf msix int cmd failed (%d)\n",
-			ret);
+	return ret;
+}
+
+static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
+					  unsigned long *reset_requests)
+{
+	struct hclge_mac_tnl_stats mac_tnl_stats;
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc *desc;
+	u32 status;
+	int ret;
+
+	/* query the number of bds for the MSIx int status */
+	ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
+	if (ret)
+		goto out;
+
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto out;
 	}
 
+	ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
+					  reset_requests);
+	if (ret)
+		goto msi_error;
+
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+	ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests);
+	if (ret)
+		goto msi_error;
+
 	/* query and clear mac tnl interruptions */
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
 				   true);
@@ -1783,7 +1930,6 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 		ret = hclge_clear_mac_tnl_int(hdev);
 		if (ret)
 			dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
-		set_bit(HNAE3_NONE_RESET, reset_requests);
 	}
 
 msi_error:
@@ -1791,3 +1937,70 @@ msi_error:
 out:
 	return ret;
 }
+
+int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
+			       unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+
+	if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
+		dev_err(dev,
+			"Can't handle - MSIx error reported during dev init\n");
+		return 0;
+	}
+
+	return hclge_handle_all_hw_msix_error(hdev, reset_requests);
+}
+
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+{
+#define HCLGE_DESC_NO_DATA_LEN 8
+
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc *desc;
+	u32 status;
+	int ret;
+
+	ae_dev->hw_err_reset_req = 0;
+	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+
+	/* query the number of bds for the MSIx int status */
+	ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
+	if (ret)
+		return;
+
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		return;
+
+	/* Clear HNS hw errors reported through msix  */
+	memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) -
+	       HCLGE_DESC_NO_DATA_LEN);
+	ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
+	if (ret) {
+		dev_err(dev, "fail(%d) to clear mpf msix int during init\n",
+			ret);
+		goto msi_error;
+	}
+
+	memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) -
+	       HCLGE_DESC_NO_DATA_LEN);
+	ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
+	if (ret) {
+		dev_err(dev, "fail(%d) to clear pf msix int during init\n",
+			ret);
+		goto msi_error;
+	}
+
+	/* Handle Non-fatal HNS RAS errors */
+	if (status & HCLGE_RAS_REG_NFE_MASK) {
+		dev_warn(dev, "HNS hw error(RAS) identified during init\n");
+		hclge_handle_all_ras_errors(hdev);
+	}
+
+msi_error:
+	kfree(desc);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 9645590c9294..7ea8bb28a0cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -6,6 +6,11 @@
 
 #include "hclge_main.h"
 
+#define HCLGE_MPF_RAS_INT_MIN_BD_NUM	10
+#define HCLGE_PF_RAS_INT_MIN_BD_NUM	4
+#define HCLGE_MPF_MSIX_INT_MIN_BD_NUM	10
+#define HCLGE_PF_MSIX_INT_MIN_BD_NUM	4
+
 #define HCLGE_RAS_PF_OTHER_INT_STS_REG   0x20B00
 #define HCLGE_RAS_REG_NFE_MASK   0xFF00
 #define HCLGE_RAS_REG_ROCEE_ERR_MASK   0x3000000
@@ -47,9 +52,9 @@
 #define HCLGE_NCSI_ERR_INT_TYPE	0x9
 #define HCLGE_MAC_COMMON_ERR_INT_EN		0x107FF
 #define HCLGE_MAC_COMMON_ERR_INT_EN_MASK	0x107FF
-#define HCLGE_MAC_TNL_INT_EN			GENMASK(7, 0)
-#define HCLGE_MAC_TNL_INT_EN_MASK		GENMASK(7, 0)
-#define HCLGE_MAC_TNL_INT_CLR			GENMASK(7, 0)
+#define HCLGE_MAC_TNL_INT_EN			GENMASK(9, 0)
+#define HCLGE_MAC_TNL_INT_EN_MASK		GENMASK(9, 0)
+#define HCLGE_MAC_TNL_INT_CLR			GENMASK(9, 0)
 #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN		GENMASK(31, 0)
 #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK	GENMASK(31, 0)
 #define HCLGE_PPU_MPF_ABNORMAL_INT1_EN		GENMASK(31, 0)
@@ -81,9 +86,10 @@
 #define HCLGE_IGU_EGU_TNL_INT_MASK	GENMASK(5, 0)
 #define HCLGE_PPP_MPF_INT_ST3_MASK	GENMASK(5, 0)
 #define HCLGE_PPU_MPF_INT_ST3_MASK	GENMASK(7, 0)
-#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK	GENMASK(29, 28)
+#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK	BIT(29)
 #define HCLGE_PPU_PF_INT_RAS_MASK	0x18
-#define HCLGE_PPU_PF_INT_MSIX_MASK	0x27
+#define HCLGE_PPU_PF_INT_MSIX_MASK	0x26
+#define HCLGE_PPU_PF_OVER_8BD_ERR_MASK	0x01
 #define HCLGE_QCN_FIFO_INT_MASK		GENMASK(17, 0)
 #define HCLGE_QCN_ECC_INT_MASK		GENMASK(21, 0)
 #define HCLGE_NCSI_ECC_INT_MASK		GENMASK(1, 0)
@@ -94,6 +100,7 @@
 #define HCLGE_ROCEE_RAS_CE_INT_EN_MASK		0x1
 #define HCLGE_ROCEE_RERR_INT_MASK		BIT(0)
 #define HCLGE_ROCEE_BERR_INT_MASK		BIT(1)
+#define HCLGE_ROCEE_AXI_ERR_INT_MASK		GENMASK(1, 0)
 #define HCLGE_ROCEE_ECC_INT_MASK		BIT(2)
 #define HCLGE_ROCEE_OVF_INT_MASK		BIT(3)
 #define HCLGE_ROCEE_OVF_ERR_INT_MASK		0x10000
@@ -119,7 +126,9 @@ struct hclge_hw_error {
 };
 
 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
-int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
+int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
+int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 			       unsigned long *reset_requests);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d3b1f8cb1155..3fde5471e1c0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -27,14 +27,26 @@
 #define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
 
-#define HCLGE_BUF_SIZE_UNIT	256
+#define HCLGE_BUF_SIZE_UNIT	256U
+#define HCLGE_BUF_MUL_BY	2
+#define HCLGE_BUF_DIV_BY	2
+#define NEED_RESERVE_TC_NUM	2
+#define BUF_MAX_PERCENT		100
+#define BUF_RESERVE_PERCENT	90
+
+#define HCLGE_RESET_MAX_FAIL_CNT	5
 
 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
 static int hclge_init_vlan_config(struct hclge_dev *hdev);
+static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
 static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 			       u16 *allocated_size, bool is_alloc);
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
+static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
+						   unsigned long *addr);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -290,7 +302,7 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
 static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = {
 	{
 		.flags = HCLGE_MAC_MGR_MASK_VLAN_B,
-		.ethter_type = cpu_to_le16(HCLGE_MAC_ETHERTYPE_LLDP),
+		.ethter_type = cpu_to_le16(ETH_P_LLDP),
 		.mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
 		.mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
 		.i_port_bitmap = 0x1,
@@ -437,8 +449,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 		queue = handle->kinfo.tqp[i];
 		tqp = container_of(queue, struct hclge_tqp, q);
 		/* command : HCLGE_OPC_QUERY_IGU_STAT */
-		hclge_cmd_setup_basic_desc(&desc[0],
-					   HCLGE_OPC_QUERY_RX_STATUS,
+		hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATUS,
 					   true);
 
 		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
@@ -446,7 +457,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"Query tqp stat fail, status = %d,queue = %d\n",
-				ret,	i);
+				ret, i);
 			return ret;
 		}
 		tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
@@ -500,6 +511,7 @@ static int hclge_tqps_get_sset_count(struct hnae3_handle *handle, int stringset)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 
+	/* each tqp has TX & RX two queues */
 	return kinfo->num_tqps * (2);
 }
 
@@ -528,7 +540,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
 	return buff;
 }
 
-static u64 *hclge_comm_get_stats(void *comm_stats,
+static u64 *hclge_comm_get_stats(const void *comm_stats,
 				 const struct hclge_comm_stats_str strs[],
 				 int size, u64 *data)
 {
@@ -552,8 +564,7 @@ static u8 *hclge_comm_get_strings(u32 stringset,
 		return buff;
 
 	for (i = 0; i < size; i++) {
-		snprintf(buff, ETH_GSTRING_LEN,
-			 strs[i].desc);
+		snprintf(buff, ETH_GSTRING_LEN, "%s", strs[i].desc);
 		buff = buff + ETH_GSTRING_LEN;
 	}
 
@@ -644,8 +655,7 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 	return count;
 }
 
-static void hclge_get_strings(struct hnae3_handle *handle,
-			      u32 stringset,
+static void hclge_get_strings(struct hnae3_handle *handle, u32 stringset,
 			      u8 *data)
 {
 	u8 *p = (char *)data;
@@ -653,21 +663,17 @@ static void hclge_get_strings(struct hnae3_handle *handle,
 
 	if (stringset == ETH_SS_STATS) {
 		size = ARRAY_SIZE(g_mac_stats_string);
-		p = hclge_comm_get_strings(stringset,
-					   g_mac_stats_string,
-					   size,
-					   p);
+		p = hclge_comm_get_strings(stringset, g_mac_stats_string,
+					   size, p);
 		p = hclge_tqps_get_strings(handle, p);
 	} else if (stringset == ETH_SS_TEST) {
 		if (handle->flags & HNAE3_SUPPORT_APP_LOOPBACK) {
-			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_LOOP_APP],
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_APP],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
 		if (handle->flags & HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK) {
-			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_LOOP_SERIAL_SERDES],
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_SERIAL_SERDES],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
@@ -678,8 +684,7 @@ static void hclge_get_strings(struct hnae3_handle *handle,
 			p += ETH_GSTRING_LEN;
 		}
 		if (handle->flags & HNAE3_SUPPORT_PHY_LOOPBACK) {
-			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_LOOP_PHY],
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_PHY],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
@@ -692,10 +697,8 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
 	struct hclge_dev *hdev = vport->back;
 	u64 *p;
 
-	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats,
-				 g_mac_stats_string,
-				 ARRAY_SIZE(g_mac_stats_string),
-				 data);
+	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string,
+				 ARRAY_SIZE(g_mac_stats_string), data);
 	p = hclge_tqps_get_stats(handle, p);
 }
 
@@ -726,6 +729,8 @@ static int hclge_parse_func_status(struct hclge_dev *hdev,
 
 static int hclge_query_function_status(struct hclge_dev *hdev)
 {
+#define HCLGE_QUERY_MAX_CNT	5
+
 	struct hclge_func_status_cmd *req;
 	struct hclge_desc desc;
 	int timeout = 0;
@@ -738,9 +743,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev)
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"query function status failed %d.\n",
-				ret);
-
+				"query function status failed %d.\n", ret);
 			return ret;
 		}
 
@@ -748,7 +751,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev)
 		if (req->pf_state)
 			break;
 		usleep_range(1000, 2000);
-	} while (timeout++ < 5);
+	} while (timeout++ < HCLGE_QUERY_MAX_CNT);
 
 	ret = hclge_parse_func_status(hdev, req);
 
@@ -800,7 +803,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 		/* PF should have NIC vectors and Roce vectors,
 		 * NIC vectors are queued before Roce vectors.
 		 */
-		hdev->num_msi = hdev->num_roce_msi  +
+		hdev->num_msi = hdev->num_roce_msi +
 				hdev->roce_base_msix_offset;
 	} else {
 		hdev->num_msi =
@@ -1058,6 +1061,7 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 }
 
 static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
@@ -1076,7 +1080,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	struct hclge_cfg_param_cmd *req;
 	u64 mac_addr_tmp_high;
 	u64 mac_addr_tmp;
-	int i;
+	unsigned int i;
 
 	req = (struct hclge_cfg_param_cmd *)desc[0].data;
 
@@ -1138,7 +1142,8 @@ static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
 {
 	struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
 	struct hclge_cfg_param_cmd *req;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
 		u32 offset = 0;
@@ -1204,7 +1209,8 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev)
 static int hclge_configure(struct hclge_dev *hdev)
 {
 	struct hclge_cfg cfg;
-	int ret, i;
+	unsigned int i;
+	int ret;
 
 	ret = hclge_get_cfg(hdev, &cfg);
 	if (ret) {
@@ -1226,8 +1232,10 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->tm_info.hw_pfc_map = 0;
 	hdev->wanted_umv_size = cfg.umv_space;
 
-	if (hnae3_dev_fd_supported(hdev))
+	if (hnae3_dev_fd_supported(hdev)) {
 		hdev->fd_en = true;
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+	}
 
 	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
 	if (ret) {
@@ -1265,8 +1273,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 	return ret;
 }
 
-static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
-			    int tso_mss_max)
+static int hclge_config_tso(struct hclge_dev *hdev, unsigned int tso_mss_min,
+			    unsigned int tso_mss_max)
 {
 	struct hclge_cfg_tso_status_cmd *req;
 	struct hclge_desc desc;
@@ -1352,8 +1360,9 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
 	req = (struct hclge_tqp_map_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(tqp_pid);
 	req->tqp_vf = func_id;
-	req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
-			1 << HCLGE_TQP_MAP_EN_B;
+	req->tqp_flag = 1U << HCLGE_TQP_MAP_EN_B;
+	if (!is_pf)
+		req->tqp_flag |= 1U << HCLGE_TQP_MAP_TYPE_B;
 	req->tqp_vid = cpu_to_le16(tqp_vid);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1457,11 +1466,6 @@ static int hclge_map_tqp(struct hclge_dev *hdev)
 	return 0;
 }
 
-static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps)
-{
-	/* this would be initialized later */
-}
-
 static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
 {
 	struct hnae3_handle *nic = &vport->nic;
@@ -1472,20 +1476,12 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
 	nic->ae_algo = &ae_algo;
 	nic->numa_node_mask = hdev->numa_node_mask;
 
-	if (hdev->ae_dev->dev_type == HNAE3_DEV_KNIC) {
-		ret = hclge_knic_setup(vport, num_tqps,
-				       hdev->num_tx_desc, hdev->num_rx_desc);
-
-		if (ret) {
-			dev_err(&hdev->pdev->dev, "knic setup failed %d\n",
-				ret);
-			return ret;
-		}
-	} else {
-		hclge_unic_setup(vport, num_tqps);
-	}
+	ret = hclge_knic_setup(vport, num_tqps,
+			       hdev->num_tx_desc, hdev->num_rx_desc);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "knic setup failed %d\n", ret);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_alloc_vport(struct hclge_dev *hdev)
@@ -1591,7 +1587,8 @@ static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
 
 static u32 hclge_get_tc_num(struct hclge_dev *hdev)
 {
-	int i, cnt = 0;
+	unsigned int i;
+	u32 cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
 		if (hdev->hw_tc_map & BIT(i))
@@ -1604,7 +1601,8 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
 				  struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
-	int i, cnt = 0;
+	unsigned int i;
+	int cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &buf_alloc->priv_buf[i];
@@ -1621,7 +1619,8 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev,
 				     struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
-	int i, cnt = 0;
+	unsigned int i;
+	int cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &buf_alloc->priv_buf[i];
@@ -1671,7 +1670,8 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev,
 	aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT);
 
 	if (hnae3_dev_dcb_supported(hdev))
-		shared_buf_min = 2 * aligned_mps + hdev->dv_buf_size;
+		shared_buf_min = HCLGE_BUF_MUL_BY * aligned_mps +
+					hdev->dv_buf_size;
 	else
 		shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF
 					+ hdev->dv_buf_size;
@@ -1689,7 +1689,8 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev,
 	if (hnae3_dev_dcb_supported(hdev)) {
 		buf_alloc->s_buf.self.high = shared_buf - hdev->dv_buf_size;
 		buf_alloc->s_buf.self.low = buf_alloc->s_buf.self.high
-			- roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT);
+			- roundup(aligned_mps / HCLGE_BUF_DIV_BY,
+				  HCLGE_BUF_SIZE_UNIT);
 	} else {
 		buf_alloc->s_buf.self.high = aligned_mps +
 						HCLGE_NON_DCB_ADDITIONAL_BUF;
@@ -1697,14 +1698,18 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev,
 	}
 
 	if (hnae3_dev_dcb_supported(hdev)) {
+		hi_thrd = shared_buf - hdev->dv_buf_size;
+
+		if (tc_num <= NEED_RESERVE_TC_NUM)
+			hi_thrd = hi_thrd * BUF_RESERVE_PERCENT
+					/ BUF_MAX_PERCENT;
+
 		if (tc_num)
-			hi_thrd = (shared_buf - hdev->dv_buf_size) / tc_num;
-		else
-			hi_thrd = shared_buf - hdev->dv_buf_size;
+			hi_thrd = hi_thrd / tc_num;
 
-		hi_thrd = max_t(u32, hi_thrd, 2 * aligned_mps);
+		hi_thrd = max_t(u32, hi_thrd, HCLGE_BUF_MUL_BY * aligned_mps);
 		hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT);
-		lo_thrd = hi_thrd - aligned_mps / 2;
+		lo_thrd = hi_thrd - aligned_mps / HCLGE_BUF_DIV_BY;
 	} else {
 		hi_thrd = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF;
 		lo_thrd = aligned_mps;
@@ -1749,7 +1754,7 @@ static bool hclge_rx_buf_calc_all(struct hclge_dev *hdev, bool max,
 {
 	u32 rx_all = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
 	u32 aligned_mps = round_up(hdev->mps, HCLGE_BUF_SIZE_UNIT);
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
@@ -1765,12 +1770,13 @@ static bool hclge_rx_buf_calc_all(struct hclge_dev *hdev, bool max,
 		priv->enable = 1;
 
 		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
-			priv->wl.low = max ? aligned_mps : 256;
+			priv->wl.low = max ? aligned_mps : HCLGE_BUF_SIZE_UNIT;
 			priv->wl.high = roundup(priv->wl.low + aligned_mps,
 						HCLGE_BUF_SIZE_UNIT);
 		} else {
 			priv->wl.low = 0;
-			priv->wl.high = max ? (aligned_mps * 2) : aligned_mps;
+			priv->wl.high = max ? (aligned_mps * HCLGE_BUF_MUL_BY) :
+					aligned_mps;
 		}
 
 		priv->buf_size = priv->wl.high + hdev->dv_buf_size;
@@ -1789,9 +1795,10 @@ static bool hclge_drop_nopfc_buf_till_fit(struct hclge_dev *hdev,
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
 		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+		unsigned int mask = BIT((unsigned int)i);
 
-		if (hdev->hw_tc_map & BIT(i) &&
-		    !(hdev->tm_info.hw_pfc_map & BIT(i))) {
+		if (hdev->hw_tc_map & mask &&
+		    !(hdev->tm_info.hw_pfc_map & mask)) {
 			/* Clear the no pfc TC private buffer */
 			priv->wl.low = 0;
 			priv->wl.high = 0;
@@ -1818,9 +1825,10 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev,
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
 		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+		unsigned int mask = BIT((unsigned int)i);
 
-		if (hdev->hw_tc_map & BIT(i) &&
-		    hdev->tm_info.hw_pfc_map & BIT(i)) {
+		if (hdev->hw_tc_map & mask &&
+		    hdev->tm_info.hw_pfc_map & mask) {
 			/* Reduce the number of pfc TC with private buffer */
 			priv->wl.low = 0;
 			priv->enable = 0;
@@ -1837,6 +1845,55 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev,
 	return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
 }
 
+static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
+				      struct hclge_pkt_buf_alloc *buf_alloc)
+{
+#define COMPENSATE_BUFFER	0x3C00
+#define COMPENSATE_HALF_MPS_NUM	5
+#define PRIV_WL_GAP		0x1800
+
+	u32 rx_priv = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+	u32 tc_num = hclge_get_tc_num(hdev);
+	u32 half_mps = hdev->mps >> 1;
+	u32 min_rx_priv;
+	unsigned int i;
+
+	if (tc_num)
+		rx_priv = rx_priv / tc_num;
+
+	if (tc_num <= NEED_RESERVE_TC_NUM)
+		rx_priv = rx_priv * BUF_RESERVE_PERCENT / BUF_MAX_PERCENT;
+
+	min_rx_priv = hdev->dv_buf_size + COMPENSATE_BUFFER +
+			COMPENSATE_HALF_MPS_NUM * half_mps;
+	min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT);
+	rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT);
+
+	if (rx_priv < min_rx_priv)
+		return false;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+
+		priv->enable = 0;
+		priv->wl.low = 0;
+		priv->wl.high = 0;
+		priv->buf_size = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		priv->enable = 1;
+		priv->buf_size = rx_priv;
+		priv->wl.high = rx_priv - hdev->dv_buf_size;
+		priv->wl.low = priv->wl.high - PRIV_WL_GAP;
+	}
+
+	buf_alloc->s_buf.buf_size = 0;
+
+	return true;
+}
+
 /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
  * @hdev: pointer to struct hclge_dev
  * @buf_alloc: pointer to buffer calculation data
@@ -1856,6 +1913,9 @@ static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
 		return 0;
 	}
 
+	if (hclge_only_alloc_priv_buff(hdev, buf_alloc))
+		return 0;
+
 	if (hclge_rx_buf_calc_all(hdev, true, buf_alloc))
 		return 0;
 
@@ -2153,7 +2213,6 @@ static int hclge_init_msi(struct hclge_dev *hdev)
 
 static u8 hclge_check_speed_dup(u8 duplex, int speed)
 {
-
 	if (!(speed == HCLGE_MAC_SPEED_10M || speed == HCLGE_MAC_SPEED_100M))
 		duplex = HCLGE_MAC_FULL;
 
@@ -2171,7 +2230,8 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
 
-	hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
+	if (duplex)
+		hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, 1);
 
 	switch (speed) {
 	case HCLGE_MAC_SPEED_10M:
@@ -2261,7 +2321,8 @@ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
 
 	req = (struct hclge_config_auto_neg_cmd *)desc.data;
-	hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+	if (enable)
+		hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, 1U);
 	req->cfg_an_cmd_flag = cpu_to_le32(flag);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2316,6 +2377,17 @@ static int hclge_restart_autoneg(struct hnae3_handle *handle)
 	return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
+static int hclge_halt_autoneg(struct hnae3_handle *handle, bool halt)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->hw.mac.support_autoneg && hdev->hw.mac.autoneg)
+		return hclge_set_autoneg_en(hdev, !halt);
+
+	return 0;
+}
+
 static int hclge_set_fec_hw(struct hclge_dev *hdev, u32 fec_mode)
 {
 	struct hclge_config_fec_cmd *req;
@@ -2389,6 +2461,15 @@ static int hclge_mac_init(struct hclge_dev *hdev)
 		return ret;
 	}
 
+	if (hdev->hw.mac.support_autoneg) {
+		ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Config mac autoneg fail ret=%d\n", ret);
+			return ret;
+		}
+	}
+
 	mac->link = 0;
 
 	if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
@@ -2423,7 +2504,8 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
 
 static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
 		schedule_work(&hdev->rst_service_task);
 }
 
@@ -2458,7 +2540,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev)
 
 static int hclge_get_mac_phy_link(struct hclge_dev *hdev)
 {
-	int mac_state;
+	unsigned int mac_state;
 	int link_stat;
 
 	if (test_bit(HCLGE_STATE_DOWN, &hdev->state))
@@ -2508,6 +2590,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 
 static void hclge_update_port_capability(struct hclge_mac *mac)
 {
+	/* update fec ability by speed */
+	hclge_convert_setting_fec(mac);
+
 	/* firmware can not identify back plane type, the media type
 	 * read from configuration can help deal it
 	 */
@@ -2529,7 +2614,7 @@ static void hclge_update_port_capability(struct hclge_mac *mac)
 
 static int hclge_get_sfp_speed(struct hclge_dev *hdev, u32 *speed)
 {
-	struct hclge_sfp_info_cmd *resp = NULL;
+	struct hclge_sfp_info_cmd *resp;
 	struct hclge_desc desc;
 	int ret;
 
@@ -2580,6 +2665,11 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
 		mac->speed_ability = le32_to_cpu(resp->speed_ability);
 		mac->autoneg = resp->autoneg;
 		mac->support_autoneg = resp->autoneg_ability;
+		mac->speed_type = QUERY_ACTIVE_SPEED;
+		if (!resp->active_fec)
+			mac->fec_mode = 0;
+		else
+			mac->fec_mode = BIT(resp->active_fec);
 	} else {
 		mac->speed_type = QUERY_SFP_SPEED;
 	}
@@ -2645,6 +2735,7 @@ static void hclge_service_timer(struct timer_list *t)
 
 	mod_timer(&hdev->service_timer, jiffies + HZ);
 	hdev->hw_stats.stats_timer++;
+	hdev->fd_arfs_expire_timer++;
 	hclge_task_schedule(hdev);
 }
 
@@ -2693,19 +2784,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
-	if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
-		dev_info(&hdev->pdev->dev, "core reset interrupt\n");
-		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
-		set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
-		*clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
-		hdev->rst_stats.core_rst_cnt++;
-		return HCLGE_VECTOR0_EVENT_RST;
-	}
-
 	/* check for vector0 msix event source */
 	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
-		dev_dbg(&hdev->pdev->dev, "received event 0x%x\n",
-			msix_src_reg);
+		dev_info(&hdev->pdev->dev, "received event 0x%x\n",
+			 msix_src_reg);
+		*clearval = msix_src_reg;
 		return HCLGE_VECTOR0_EVENT_ERR;
 	}
 
@@ -2717,8 +2800,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 	}
 
 	/* print other vector0 event source */
-	dev_dbg(&hdev->pdev->dev, "cmdq_src_reg:0x%x, msix_src_reg:0x%x\n",
-		cmdq_src_reg, msix_src_reg);
+	dev_info(&hdev->pdev->dev,
+		 "CMDQ INT status:0x%x, other INT status:0x%x\n",
+		 cmdq_src_reg, msix_src_reg);
+	*clearval = msix_src_reg;
+
 	return HCLGE_VECTOR0_EVENT_OTHER;
 }
 
@@ -2754,8 +2840,8 @@ static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable)
 static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 {
 	struct hclge_dev *hdev = data;
+	u32 clearval = 0;
 	u32 event_cause;
-	u32 clearval;
 
 	hclge_enable_vector(&hdev->misc_vector, false);
 	event_cause = hclge_check_event_cause(hdev, &clearval);
@@ -2797,7 +2883,8 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 	}
 
 	/* clear the source of interrupt if it is not cause by reset */
-	if (event_cause == HCLGE_VECTOR0_EVENT_MBX) {
+	if (!clearval ||
+	    event_cause == HCLGE_VECTOR0_EVENT_MBX) {
 		hclge_clear_event_cause(hdev, event_cause, clearval);
 		hclge_enable_vector(&hdev->misc_vector, true);
 	}
@@ -2861,6 +2948,9 @@ int hclge_notify_client(struct hclge_dev *hdev,
 	struct hnae3_client *client = hdev->nic_client;
 	u16 i;
 
+	if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client)
+		return 0;
+
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
@@ -2886,7 +2976,7 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
 	int ret = 0;
 	u16 i;
 
-	if (!client)
+	if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client)
 		return 0;
 
 	if (!client->ops->reset_notify)
@@ -2923,10 +3013,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 		reg = HCLGE_GLOBAL_RESET_REG;
 		reg_bit = HCLGE_GLOBAL_RESET_BIT;
 		break;
-	case HNAE3_CORE_RESET:
-		reg = HCLGE_GLOBAL_RESET_REG;
-		reg_bit = HCLGE_CORE_RESET_BIT;
-		break;
 	case HNAE3_FUNC_RESET:
 		reg = HCLGE_FUN_RST_ING;
 		reg_bit = HCLGE_FUN_RST_ING_B;
@@ -3058,12 +3144,6 @@ static void hclge_do_reset(struct hclge_dev *hdev)
 		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
 		dev_info(&pdev->dev, "Global Reset requested\n");
 		break;
-	case HNAE3_CORE_RESET:
-		val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
-		hnae3_set_bit(val, HCLGE_CORE_RESET_BIT, 1);
-		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
-		dev_info(&pdev->dev, "Core Reset requested\n");
-		break;
 	case HNAE3_FUNC_RESET:
 		dev_info(&pdev->dev, "PF Reset requested\n");
 		/* schedule again to check later */
@@ -3083,10 +3163,11 @@ static void hclge_do_reset(struct hclge_dev *hdev)
 	}
 }
 
-static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
+static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 						   unsigned long *addr)
 {
 	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
+	struct hclge_dev *hdev = ae_dev->priv;
 
 	/* first, resolve any unknown reset type to the known type(s) */
 	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
@@ -3110,16 +3191,10 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
 		rst_level = HNAE3_IMP_RESET;
 		clear_bit(HNAE3_IMP_RESET, addr);
 		clear_bit(HNAE3_GLOBAL_RESET, addr);
-		clear_bit(HNAE3_CORE_RESET, addr);
 		clear_bit(HNAE3_FUNC_RESET, addr);
 	} else if (test_bit(HNAE3_GLOBAL_RESET, addr)) {
 		rst_level = HNAE3_GLOBAL_RESET;
 		clear_bit(HNAE3_GLOBAL_RESET, addr);
-		clear_bit(HNAE3_CORE_RESET, addr);
-		clear_bit(HNAE3_FUNC_RESET, addr);
-	} else if (test_bit(HNAE3_CORE_RESET, addr)) {
-		rst_level = HNAE3_CORE_RESET;
-		clear_bit(HNAE3_CORE_RESET, addr);
 		clear_bit(HNAE3_FUNC_RESET, addr);
 	} else if (test_bit(HNAE3_FUNC_RESET, addr)) {
 		rst_level = HNAE3_FUNC_RESET;
@@ -3147,9 +3222,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
 	case HNAE3_GLOBAL_RESET:
 		clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
 		break;
-	case HNAE3_CORE_RESET:
-		clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
-		break;
 	default:
 		break;
 	}
@@ -3180,6 +3252,8 @@ static int hclge_reset_prepare_down(struct hclge_dev *hdev)
 
 static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 {
+#define HCLGE_RESET_SYNC_TIME 100
+
 	u32 reg_val;
 	int ret = 0;
 
@@ -3188,7 +3262,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		/* There is no mechanism for PF to know if VF has stopped IO
 		 * for now, just wait 100 ms for VF to stop IO
 		 */
-		msleep(100);
+		msleep(HCLGE_RESET_SYNC_TIME);
 		ret = hclge_func_reset_cmd(hdev, 0);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -3208,7 +3282,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		/* There is no mechanism for PF to know if VF has stopped IO
 		 * for now, just wait 100 ms for VF to stop IO
 		 */
-		msleep(100);
+		msleep(HCLGE_RESET_SYNC_TIME);
 		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
 		hdev->rst_stats.flr_rst_cnt++;
@@ -3222,6 +3296,10 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		break;
 	}
 
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGE_RESET_SYNC_TIME);
+	hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG,
+			HCLGE_NIC_CMQ_ENABLE);
 	dev_info(&hdev->pdev->dev, "prepare wait ok\n");
 
 	return ret;
@@ -3230,7 +3308,6 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 static bool hclge_reset_err_handle(struct hclge_dev *hdev, bool is_timeout)
 {
 #define MAX_RESET_FAIL_CNT 5
-#define RESET_UPGRADE_DELAY_SEC 10
 
 	if (hdev->reset_pending) {
 		dev_info(&hdev->pdev->dev, "Reset pending %lu\n",
@@ -3254,8 +3331,9 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev, bool is_timeout)
 
 		dev_info(&hdev->pdev->dev, "Upgrade reset level\n");
 		hclge_clear_reset_cause(hdev);
+		set_bit(HNAE3_GLOBAL_RESET, &hdev->default_reset_request);
 		mod_timer(&hdev->reset_timer,
-			  jiffies + RESET_UPGRADE_DELAY_SEC * HZ);
+			  jiffies + HCLGE_RESET_INTERVAL);
 
 		return false;
 	}
@@ -3282,6 +3360,25 @@ static int hclge_reset_prepare_up(struct hclge_dev *hdev)
 	return ret;
 }
 
+static int hclge_reset_stack(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		return ret;
+
+	ret = hclge_reset_ae_dev(hdev->ae_dev);
+	if (ret)
+		return ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+	if (ret)
+		return ret;
+
+	return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
+}
+
 static void hclge_reset(struct hclge_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
@@ -3325,19 +3422,8 @@ static void hclge_reset(struct hclge_dev *hdev)
 		goto err_reset;
 
 	rtnl_lock();
-	ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
-	if (ret)
-		goto err_reset_lock;
 
-	ret = hclge_reset_ae_dev(hdev->ae_dev);
-	if (ret)
-		goto err_reset_lock;
-
-	ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
-	if (ret)
-		goto err_reset_lock;
-
-	ret = hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
+	ret = hclge_reset_stack(hdev);
 	if (ret)
 		goto err_reset_lock;
 
@@ -3347,16 +3433,23 @@ static void hclge_reset(struct hclge_dev *hdev)
 	if (ret)
 		goto err_reset_lock;
 
+	rtnl_unlock();
+
+	ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
+	/* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1
+	 * times
+	 */
+	if (ret && hdev->reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1)
+		goto err_reset;
+
+	rtnl_lock();
+
 	ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 	if (ret)
 		goto err_reset_lock;
 
 	rtnl_unlock();
 
-	ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
-	if (ret)
-		goto err_reset;
-
 	ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
 	if (ret)
 		goto err_reset;
@@ -3399,11 +3492,12 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
 	if (!handle)
 		handle = &hdev->vport[0].nic;
 
-	if (time_before(jiffies, (hdev->last_reset_time + 3 * HZ)))
+	if (time_before(jiffies, (hdev->last_reset_time +
+				  HCLGE_RESET_INTERVAL)))
 		return;
 	else if (hdev->default_reset_request)
 		hdev->reset_level =
-			hclge_get_reset_level(hdev,
+			hclge_get_reset_level(ae_dev,
 					      &hdev->default_reset_request);
 	else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ)))
 		hdev->reset_level = HNAE3_FUNC_RESET;
@@ -3432,13 +3526,14 @@ static void hclge_reset_timer(struct timer_list *t)
 	struct hclge_dev *hdev = from_timer(hdev, t, reset_timer);
 
 	dev_info(&hdev->pdev->dev,
-		 "triggering global reset in reset timer\n");
-	set_bit(HNAE3_GLOBAL_RESET, &hdev->default_reset_request);
+		 "triggering reset in reset timer\n");
 	hclge_reset_event(hdev->pdev, NULL);
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
 	/* check if there is any ongoing reset in the hardware. This status can
 	 * be checked from reset_pending. If there is then, we need to wait for
 	 * hardware to complete reset.
@@ -3449,12 +3544,12 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
 	 *       now.
 	 */
 	hdev->last_reset_time = jiffies;
-	hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_pending);
+	hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_pending);
 	if (hdev->reset_type != HNAE3_NONE_RESET)
 		hclge_reset(hdev);
 
 	/* check if we got any *new* reset requests to be honored */
-	hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_request);
+	hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_request);
 	if (hdev->reset_type != HNAE3_NONE_RESET)
 		hclge_do_reset(hdev);
 
@@ -3521,6 +3616,11 @@ static void hclge_service_task(struct work_struct *work)
 	hclge_update_port_info(hdev);
 	hclge_update_link_status(hdev);
 	hclge_update_vport_alive(hdev);
+	hclge_sync_vlan_filter(hdev);
+	if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+		hclge_rfs_filter_expire(hdev);
+		hdev->fd_arfs_expire_timer = 0;
+	}
 	hclge_service_complete(hdev);
 }
 
@@ -3614,29 +3714,28 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 				  const u8 hfunc, const u8 *key)
 {
 	struct hclge_rss_config_cmd *req;
+	unsigned int key_offset = 0;
 	struct hclge_desc desc;
-	int key_offset;
+	int key_counts;
 	int key_size;
 	int ret;
 
+	key_counts = HCLGE_RSS_KEY_SIZE;
 	req = (struct hclge_rss_config_cmd *)desc.data;
 
-	for (key_offset = 0; key_offset < 3; key_offset++) {
+	while (key_counts) {
 		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
 					   false);
 
 		req->hash_config |= (hfunc & HCLGE_RSS_HASH_ALGO_MASK);
 		req->hash_config |= (key_offset << HCLGE_RSS_HASH_KEY_OFFSET_B);
 
-		if (key_offset == 2)
-			key_size =
-			HCLGE_RSS_KEY_SIZE - HCLGE_RSS_HASH_KEY_NUM * 2;
-		else
-			key_size = HCLGE_RSS_HASH_KEY_NUM;
-
+		key_size = min(HCLGE_RSS_HASH_KEY_NUM, key_counts);
 		memcpy(req->hash_key,
 		       key + key_offset * HCLGE_RSS_HASH_KEY_NUM, key_size);
 
+		key_counts -= key_size;
+		key_offset++;
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -3995,13 +4094,14 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 	struct hclge_vport *vport = hdev->vport;
 	u8 *rss_indir = vport[0].rss_indirection_tbl;
 	u16 rss_size = vport[0].alloc_rss_size;
+	u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
+	u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
 	u8 *key = vport[0].rss_hash_key;
 	u8 hfunc = vport[0].rss_algo;
-	u16 tc_offset[HCLGE_MAX_TC_NUM];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
-	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u16 roundup_size;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	ret = hclge_set_rss_indir_table(hdev, rss_indir);
 	if (ret)
@@ -4156,8 +4256,7 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport,
 	return 0;
 }
 
-static int hclge_map_ring_to_vector(struct hnae3_handle *handle,
-				    int vector,
+static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector,
 				    struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4174,8 +4273,7 @@ static int hclge_map_ring_to_vector(struct hnae3_handle *handle,
 	return hclge_bind_ring_with_vector(vport, vector_id, true, ring_chain);
 }
 
-static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
-				       int vector,
+static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4196,8 +4294,7 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vectorid=%d, ret =%d\n",
-			vector_id,
-			ret);
+			vector_id, ret);
 
 	return ret;
 }
@@ -4503,19 +4600,19 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 	case 0:
 		return false;
 	case BIT(INNER_DST_MAC):
-		for (i = 0; i < 6; i++) {
-			calc_x(key_x[5 - i], rule->tuples.dst_mac[i],
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
 			       rule->tuples_mask.dst_mac[i]);
-			calc_y(key_y[5 - i], rule->tuples.dst_mac[i],
+			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
 			       rule->tuples_mask.dst_mac[i]);
 		}
 
 		return true;
 	case BIT(INNER_SRC_MAC):
-		for (i = 0; i < 6; i++) {
-			calc_x(key_x[5 - i], rule->tuples.src_mac[i],
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
 			       rule->tuples.src_mac[i]);
-			calc_y(key_y[5 - i], rule->tuples.src_mac[i],
+			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
 			       rule->tuples.src_mac[i]);
 		}
 
@@ -4551,19 +4648,19 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 
 		return true;
 	case BIT(INNER_SRC_IP):
-		calc_x(tmp_x_l, rule->tuples.src_ip[3],
-		       rule->tuples_mask.src_ip[3]);
-		calc_y(tmp_y_l, rule->tuples.src_ip[3],
-		       rule->tuples_mask.src_ip[3]);
+		calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX],
+		       rule->tuples_mask.src_ip[IPV4_INDEX]);
+		calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX],
+		       rule->tuples_mask.src_ip[IPV4_INDEX]);
 		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
 		return true;
 	case BIT(INNER_DST_IP):
-		calc_x(tmp_x_l, rule->tuples.dst_ip[3],
-		       rule->tuples_mask.dst_ip[3]);
-		calc_y(tmp_y_l, rule->tuples.dst_ip[3],
-		       rule->tuples_mask.dst_ip[3]);
+		calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX],
+		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
+		calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX],
+		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
 		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
@@ -4617,7 +4714,7 @@ static void hclge_fd_convert_meta_data(struct hclge_fd_key_cfg *key_cfg,
 {
 	u32 tuple_bit, meta_data = 0, tmp_x, tmp_y, port_number;
 	u8 cur_pos = 0, tuple_size, shift_bits;
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < MAX_META_DATA; i++) {
 		tuple_size = meta_data_key_info[i].key_length;
@@ -4659,7 +4756,8 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 	struct hclge_fd_key_cfg *key_cfg = &hdev->fd_cfg.key_cfg[stage];
 	u8 key_x[MAX_KEY_BYTES], key_y[MAX_KEY_BYTES];
 	u8 *cur_key_x, *cur_key_y;
-	int i, ret, tuple_size;
+	unsigned int i;
+	int ret, tuple_size;
 	u8 meta_data_region;
 
 	memset(key_x, 0, sizeof(key_x));
@@ -4812,6 +4910,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
 			BIT(INNER_IP_TOS);
 
+		/* check whether src/dst ip address used */
 		if (!tcp_ip6_spec->ip6src[0] && !tcp_ip6_spec->ip6src[1] &&
 		    !tcp_ip6_spec->ip6src[2] && !tcp_ip6_spec->ip6src[3])
 			*unused |= BIT(INNER_SRC_IP);
@@ -4836,6 +4935,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 			BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) |
 			BIT(INNER_DST_PORT);
 
+		/* check whether src/dst ip address used */
 		if (!usr_ip6_spec->ip6src[0] && !usr_ip6_spec->ip6src[1] &&
 		    !usr_ip6_spec->ip6src[2] && !usr_ip6_spec->ip6src[3])
 			*unused |= BIT(INNER_SRC_IP);
@@ -4906,14 +5006,18 @@ static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
 	struct hclge_fd_rule *rule = NULL;
 	struct hlist_node *node2;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
 		if (rule->location >= location)
 			break;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return  rule && rule->location == location;
 }
 
+/* make sure being called after lock up with fd_rule_lock */
 static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 				     struct hclge_fd_rule *new_rule,
 				     u16 location,
@@ -4937,9 +5041,13 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 		kfree(rule);
 		hdev->hclge_fd_rule_num--;
 
-		if (!is_add)
-			return 0;
+		if (!is_add) {
+			if (!hdev->hclge_fd_rule_num)
+				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+			clear_bit(location, hdev->fd_bmap);
 
+			return 0;
+		}
 	} else if (!is_add) {
 		dev_err(&hdev->pdev->dev,
 			"delete fail, rule %d is inexistent\n",
@@ -4954,7 +5062,9 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 	else
 		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
 
+	set_bit(location, hdev->fd_bmap);
 	hdev->hclge_fd_rule_num++;
+	hdev->fd_active_type = new_rule->rule_type;
 
 	return 0;
 }
@@ -4969,14 +5079,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	case SCTP_V4_FLOW:
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
-		rule->tuples.src_ip[3] =
+		rule->tuples.src_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[3] =
+		rule->tuples_mask.src_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
 
-		rule->tuples.dst_ip[3] =
+		rule->tuples.dst_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[3] =
+		rule->tuples_mask.dst_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
 
 		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
@@ -4995,14 +5105,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 
 		break;
 	case IP_USER_FLOW:
-		rule->tuples.src_ip[3] =
+		rule->tuples.src_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[3] =
+		rule->tuples_mask.src_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
 
-		rule->tuples.dst_ip[3] =
+		rule->tuples.dst_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[3] =
+		rule->tuples_mask.dst_ip[IPV4_INDEX] =
 				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
 
 		rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
@@ -5019,14 +5129,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	case TCP_V6_FLOW:
 	case UDP_V6_FLOW:
 		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.tcp_ip6_spec.ip6src, 4);
+				  fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
 		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.tcp_ip6_spec.ip6src, 4);
+				  fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
 
 		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.tcp_ip6_spec.ip6dst, 4);
+				  fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
 		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.tcp_ip6_spec.ip6dst, 4);
+				  fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
 
 		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
 		rule->tuples_mask.src_port =
@@ -5042,14 +5152,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 		break;
 	case IPV6_USER_FLOW:
 		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.usr_ip6_spec.ip6src, 4);
+				  fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE);
 		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.usr_ip6_spec.ip6src, 4);
+				  fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE);
 
 		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.usr_ip6_spec.ip6dst, 4);
+				  fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
 		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.usr_ip6_spec.ip6dst, 4);
+				  fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
 
 		rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
 		rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
@@ -5112,6 +5222,36 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
+/* make sure being called after lock up with fd_rule_lock */
+static int hclge_fd_config_rule(struct hclge_dev *hdev,
+				struct hclge_fd_rule *rule)
+{
+	int ret;
+
+	if (!rule) {
+		dev_err(&hdev->pdev->dev,
+			"The flow director rule is NULL\n");
+		return -EINVAL;
+	}
+
+	/* it will never fail here, so needn't to check return value */
+	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
+
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	return 0;
+
+clear_rule:
+	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+	return ret;
+}
+
 static int hclge_add_fd_entry(struct hnae3_handle *handle,
 			      struct ethtool_rxnfc *cmd)
 {
@@ -5174,8 +5314,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 		return -ENOMEM;
 
 	ret = hclge_fd_get_tuple(hdev, fs, rule);
-	if (ret)
-		goto free_rule;
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
 
 	rule->flow_type = fs->flow_type;
 
@@ -5184,24 +5326,19 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	rule->vf_id = dst_vport_id;
 	rule->queue_id = q_index;
 	rule->action = action;
+	rule->rule_type = HCLGE_FD_EP_ACTIVE;
 
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret)
-		goto free_rule;
+	/* to avoid rule conflict, when user configure rule by ethtool,
+	 * we need to clear all arfs rules
+	 */
+	hclge_clear_arfs_rules(handle);
 
-	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret)
-		goto free_rule;
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_config_rule(hdev, rule);
 
-	ret = hclge_fd_update_rule_list(hdev, rule, fs->location, true);
-	if (ret)
-		goto free_rule;
+	spin_unlock_bh(&hdev->fd_rule_lock);
 
 	return ret;
-
-free_rule:
-	kfree(rule);
-	return ret;
 }
 
 static int hclge_del_fd_entry(struct hnae3_handle *handle,
@@ -5222,18 +5359,21 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 
 	if (!hclge_fd_rule_exist(hdev, fs->location)) {
 		dev_err(&hdev->pdev->dev,
-			"Delete fail, rule %d is inexistent\n",
-			fs->location);
+			"Delete fail, rule %d is inexistent\n", fs->location);
 		return -ENOENT;
 	}
 
-	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-				   fs->location, NULL, false);
+	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location,
+				   NULL, false);
 	if (ret)
 		return ret;
 
-	return hclge_fd_update_rule_list(hdev, NULL, fs->location,
-					 false);
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return ret;
 }
 
 static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
@@ -5243,25 +5383,30 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
+	u16 location;
 
 	if (!hnae3_dev_fd_supported(hdev))
 		return;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+	for_each_set_bit(location, hdev->fd_bmap,
+			 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
+				     NULL, false);
+
 	if (clear_list) {
 		hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
 					  rule_node) {
-			hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-					     rule->location, NULL, false);
 			hlist_del(&rule->rule_node);
 			kfree(rule);
-			hdev->hclge_fd_rule_num--;
 		}
-	} else {
-		hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
-					  rule_node)
-			hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-					     rule->location, NULL, false);
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+		hdev->hclge_fd_rule_num = 0;
+		bitmap_zero(hdev->fd_bmap,
+			    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
 	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -5283,6 +5428,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 	if (!hdev->fd_en)
 		return 0;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
 		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 		if (!ret)
@@ -5292,11 +5438,18 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 			dev_warn(&hdev->pdev->dev,
 				 "Restore rule %d failed, remove it\n",
 				 rule->location);
+			clear_bit(rule->location, hdev->fd_bmap);
 			hlist_del(&rule->rule_node);
 			kfree(rule);
 			hdev->hclge_fd_rule_num--;
 		}
 	}
+
+	if (hdev->hclge_fd_rule_num)
+		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return 0;
 }
 
@@ -5329,13 +5482,18 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+
 	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
 		if (rule->location >= fs->location)
 			break;
 	}
 
-	if (!rule || fs->location != rule->location)
+	if (!rule || fs->location != rule->location) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
 		return -ENOENT;
+	}
 
 	fs->flow_type = rule->flow_type;
 	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
@@ -5343,16 +5501,16 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		fs->h_u.tcp_ip4_spec.ip4src =
-				cpu_to_be32(rule->tuples.src_ip[3]);
+				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
 		fs->m_u.tcp_ip4_spec.ip4src =
-				rule->unused_tuple & BIT(INNER_SRC_IP) ?
-				0 : cpu_to_be32(rule->tuples_mask.src_ip[3]);
+			rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
 
 		fs->h_u.tcp_ip4_spec.ip4dst =
-				cpu_to_be32(rule->tuples.dst_ip[3]);
+				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
 		fs->m_u.tcp_ip4_spec.ip4dst =
-				rule->unused_tuple & BIT(INNER_DST_IP) ?
-				0 : cpu_to_be32(rule->tuples_mask.dst_ip[3]);
+			rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
 
 		fs->h_u.tcp_ip4_spec.psrc = cpu_to_be16(rule->tuples.src_port);
 		fs->m_u.tcp_ip4_spec.psrc =
@@ -5372,16 +5530,16 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 		break;
 	case IP_USER_FLOW:
 		fs->h_u.usr_ip4_spec.ip4src =
-				cpu_to_be32(rule->tuples.src_ip[3]);
+				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
 		fs->m_u.tcp_ip4_spec.ip4src =
-				rule->unused_tuple & BIT(INNER_SRC_IP) ?
-				0 : cpu_to_be32(rule->tuples_mask.src_ip[3]);
+			rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
 
 		fs->h_u.usr_ip4_spec.ip4dst =
-				cpu_to_be32(rule->tuples.dst_ip[3]);
+				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
 		fs->m_u.usr_ip4_spec.ip4dst =
-				rule->unused_tuple & BIT(INNER_DST_IP) ?
-				0 : cpu_to_be32(rule->tuples_mask.dst_ip[3]);
+			rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
 
 		fs->h_u.usr_ip4_spec.tos = rule->tuples.ip_tos;
 		fs->m_u.usr_ip4_spec.tos =
@@ -5400,20 +5558,22 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 	case TCP_V6_FLOW:
 	case UDP_V6_FLOW:
 		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6src,
-				  rule->tuples.src_ip, 4);
+				  rule->tuples.src_ip, IPV6_SIZE);
 		if (rule->unused_tuple & BIT(INNER_SRC_IP))
-			memset(fs->m_u.tcp_ip6_spec.ip6src, 0, sizeof(int) * 4);
+			memset(fs->m_u.tcp_ip6_spec.ip6src, 0,
+			       sizeof(int) * IPV6_SIZE);
 		else
 			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6src,
-					  rule->tuples_mask.src_ip, 4);
+					  rule->tuples_mask.src_ip, IPV6_SIZE);
 
 		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6dst,
-				  rule->tuples.dst_ip, 4);
+				  rule->tuples.dst_ip, IPV6_SIZE);
 		if (rule->unused_tuple & BIT(INNER_DST_IP))
-			memset(fs->m_u.tcp_ip6_spec.ip6dst, 0, sizeof(int) * 4);
+			memset(fs->m_u.tcp_ip6_spec.ip6dst, 0,
+			       sizeof(int) * IPV6_SIZE);
 		else
 			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6dst,
-					  rule->tuples_mask.dst_ip, 4);
+					  rule->tuples_mask.dst_ip, IPV6_SIZE);
 
 		fs->h_u.tcp_ip6_spec.psrc = cpu_to_be16(rule->tuples.src_port);
 		fs->m_u.tcp_ip6_spec.psrc =
@@ -5428,20 +5588,22 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 		break;
 	case IPV6_USER_FLOW:
 		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6src,
-				  rule->tuples.src_ip, 4);
+				  rule->tuples.src_ip, IPV6_SIZE);
 		if (rule->unused_tuple & BIT(INNER_SRC_IP))
-			memset(fs->m_u.usr_ip6_spec.ip6src, 0, sizeof(int) * 4);
+			memset(fs->m_u.usr_ip6_spec.ip6src, 0,
+			       sizeof(int) * IPV6_SIZE);
 		else
 			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6src,
-					  rule->tuples_mask.src_ip, 4);
+					  rule->tuples_mask.src_ip, IPV6_SIZE);
 
 		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6dst,
-				  rule->tuples.dst_ip, 4);
+				  rule->tuples.dst_ip, IPV6_SIZE);
 		if (rule->unused_tuple & BIT(INNER_DST_IP))
-			memset(fs->m_u.usr_ip6_spec.ip6dst, 0, sizeof(int) * 4);
+			memset(fs->m_u.usr_ip6_spec.ip6dst, 0,
+			       sizeof(int) * IPV6_SIZE);
 		else
 			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6dst,
-					  rule->tuples_mask.dst_ip, 4);
+					  rule->tuples_mask.dst_ip, IPV6_SIZE);
 
 		fs->h_u.usr_ip6_spec.l4_proto = rule->tuples.ip_proto;
 		fs->m_u.usr_ip6_spec.l4_proto =
@@ -5474,6 +5636,7 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 
 		break;
 	default:
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -EOPNOTSUPP;
 	}
 
@@ -5505,6 +5668,8 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 		fs->ring_cookie |= vf_id;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return 0;
 }
 
@@ -5522,20 +5687,208 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
 
 	cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node2,
 				  &hdev->fd_rule_list, rule_node) {
-		if (cnt == cmd->rule_cnt)
+		if (cnt == cmd->rule_cnt) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
 			return -EMSGSIZE;
+		}
 
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	cmd->rule_cnt = cnt;
 
 	return 0;
 }
 
+static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
+				     struct hclge_fd_rule_tuples *tuples)
+{
+	tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto);
+	tuples->ip_proto = fkeys->basic.ip_proto;
+	tuples->dst_port = be16_to_cpu(fkeys->ports.dst);
+
+	if (fkeys->basic.n_proto == htons(ETH_P_IP)) {
+		tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src);
+		tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst);
+	} else {
+		memcpy(tuples->src_ip,
+		       fkeys->addrs.v6addrs.src.in6_u.u6_addr32,
+		       sizeof(tuples->src_ip));
+		memcpy(tuples->dst_ip,
+		       fkeys->addrs.v6addrs.dst.in6_u.u6_addr32,
+		       sizeof(tuples->dst_ip));
+	}
+}
+
+/* traverse all rules, check whether an existed rule has the same tuples */
+static struct hclge_fd_rule *
+hclge_fd_search_flow_keys(struct hclge_dev *hdev,
+			  const struct hclge_fd_rule_tuples *tuples)
+{
+	struct hclge_fd_rule *rule = NULL;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (!memcmp(tuples, &rule->tuples, sizeof(*tuples)))
+			return rule;
+	}
+
+	return NULL;
+}
+
+static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples,
+				     struct hclge_fd_rule *rule)
+{
+	rule->unused_tuple = BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			     BIT(INNER_VLAN_TAG_FST) | BIT(INNER_IP_TOS) |
+			     BIT(INNER_SRC_PORT);
+	rule->action = 0;
+	rule->vf_id = 0;
+	rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
+	if (tuples->ether_proto == ETH_P_IP) {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V4_FLOW;
+		else
+			rule->flow_type = UDP_V4_FLOW;
+	} else {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V6_FLOW;
+		else
+			rule->flow_type = UDP_V6_FLOW;
+	}
+	memcpy(&rule->tuples, tuples, sizeof(rule->tuples));
+	memset(&rule->tuples_mask, 0xFF, sizeof(rule->tuples_mask));
+}
+
+static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
+				      u16 flow_id, struct flow_keys *fkeys)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_fd_rule_tuples new_tuples;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	u16 tmp_queue_id;
+	u16 bit_id;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	memset(&new_tuples, 0, sizeof(new_tuples));
+	hclge_fd_get_flow_tuples(fkeys, &new_tuples);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	/* when there is already fd rule existed add by user,
+	 * arfs should not work
+	 */
+	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		return -EOPNOTSUPP;
+	}
+
+	/* check is there flow director filter existed for this flow,
+	 * if not, create a new filter for it;
+	 * if filter exist with different queue id, modify the filter;
+	 * if filter exist with same queue id, do nothing
+	 */
+	rule = hclge_fd_search_flow_keys(hdev, &new_tuples);
+	if (!rule) {
+		bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM);
+		if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOSPC;
+		}
+
+		rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+		if (!rule) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOMEM;
+		}
+
+		set_bit(bit_id, hdev->fd_bmap);
+		rule->location = bit_id;
+		rule->flow_id = flow_id;
+		rule->queue_id = queue_id;
+		hclge_fd_build_arfs_rule(&new_tuples, rule);
+		ret = hclge_fd_config_rule(hdev, rule);
+
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		if (ret)
+			return ret;
+
+		return rule->location;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	if (rule->queue_id == queue_id)
+		return rule->location;
+
+	tmp_queue_id = rule->queue_id;
+	rule->queue_id = queue_id;
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret) {
+		rule->queue_id = tmp_queue_id;
+		return ret;
+	}
+
+	return rule->location;
+}
+
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	HLIST_HEAD(del_list);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+		return;
+	}
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (rps_may_expire_flow(handle->netdev, rule->queue_id,
+					rule->flow_id, rule->location)) {
+			hlist_del_init(&rule->rule_node);
+			hlist_add_head(&rule->rule_node, &del_list);
+			hdev->hclge_fd_rule_num--;
+			clear_bit(rule->location, hdev->fd_bmap);
+		}
+	}
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+				     rule->location, NULL, false);
+		kfree(rule);
+	}
+#endif
+}
+
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
+		hclge_del_all_fd_entries(handle, true);
+#endif
+}
+
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5565,10 +5918,12 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	bool clear;
 
 	hdev->fd_en = enable;
+	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false;
 	if (!enable)
-		hclge_del_all_fd_entries(handle, false);
+		hclge_del_all_fd_entries(handle, clear);
 	else
 		hclge_restore_fd_entries(handle);
 }
@@ -5582,20 +5937,20 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+
+	if (enable) {
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, 1U);
+	}
+
 	req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -5726,7 +6081,7 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
 	return -EBUSY;
 }
 
-static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
+static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id,
 			    int stream_id, bool enable)
 {
 	struct hclge_desc desc;
@@ -5737,7 +6092,8 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
 	req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK);
 	req->stream_id = cpu_to_le16(stream_id);
-	req->enable |= enable << HCLGE_TQP_ENABLE_B;
+	if (enable)
+		req->enable |= 1U << HCLGE_TQP_ENABLE_B;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
@@ -5838,6 +6194,8 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
+	hclge_clear_arfs_rules(handle);
+
 	/* If it is not PF reset, the firmware will disable the MAC,
 	 * so it only need to stop phy here.
 	 */
@@ -5903,11 +6261,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 	if (op == HCLGE_MAC_VLAN_ADD) {
 		if ((!resp_code) || (resp_code == 1)) {
 			return_status = 0;
-		} else if (resp_code == 2) {
+		} else if (resp_code == HCLGE_ADD_UC_OVERFLOW) {
 			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for uc_overflow.\n");
-		} else if (resp_code == 3) {
+		} else if (resp_code == HCLGE_ADD_MC_OVERFLOW) {
 			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for mc_overflow.\n");
@@ -5952,13 +6310,15 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 
 static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
 {
-	int word_num;
-	int bit_num;
+#define HCLGE_VF_NUM_IN_FIRST_DESC 192
+
+	unsigned int word_num;
+	unsigned int bit_num;
 
 	if (vfid > 255 || vfid < 0)
 		return -EIO;
 
-	if (vfid >= 0 && vfid <= 191) {
+	if (vfid >= 0 && vfid < HCLGE_VF_NUM_IN_FIRST_DESC) {
 		word_num = vfid / 32;
 		bit_num  = vfid % 32;
 		if (clr)
@@ -5966,7 +6326,7 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
 		else
 			desc[1].data[word_num] |= cpu_to_le32(1 << bit_num);
 	} else {
-		word_num = (vfid - 192) / 32;
+		word_num = (vfid - HCLGE_VF_NUM_IN_FIRST_DESC) / 32;
 		bit_num  = vfid % 32;
 		if (clr)
 			desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num));
@@ -6149,6 +6509,10 @@ static int hclge_init_umv_space(struct hclge_dev *hdev)
 
 	mutex_init(&hdev->umv_mutex);
 	hdev->max_umv_size = allocated_size;
+	/* divide max_umv_size by (hdev->num_req_vfs + 2), in order to
+	 * preserve some unicast mac vlan table entries shared by pf
+	 * and its vfs.
+	 */
 	hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_req_vfs + 2);
 	hdev->share_umv_size = hdev->priv_umv_size +
 			hdev->max_umv_size % (hdev->num_req_vfs + 2);
@@ -6181,7 +6545,9 @@ static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 
 	req = (struct hclge_umv_spc_alc_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ALLOCATE, false);
-	hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, !is_alloc);
+	if (!is_alloc)
+		hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, 1);
+
 	req->space_size = cpu_to_le32(space_size);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -6270,8 +6636,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 	    is_multicast_ether_addr(addr)) {
 		dev_err(&hdev->pdev->dev,
 			"Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n",
-			 addr,
-			 is_zero_ether_addr(addr),
+			 addr, is_zero_ether_addr(addr),
 			 is_broadcast_ether_addr(addr),
 			 is_multicast_ether_addr(addr));
 		return -EINVAL;
@@ -6338,9 +6703,8 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	if (is_zero_ether_addr(addr) ||
 	    is_broadcast_ether_addr(addr) ||
 	    is_multicast_ether_addr(addr)) {
-		dev_dbg(&hdev->pdev->dev,
-			"Remove mac err! invalid mac:%pM.\n",
-			 addr);
+		dev_dbg(&hdev->pdev->dev, "Remove mac err! invalid mac:%pM.\n",
+			addr);
 		return -EINVAL;
 	}
 
@@ -6381,18 +6745,16 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
 	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hclge_prepare_mac_addr(&req, addr, true);
 	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
-	if (!status) {
-		/* This mac addr exist, update VFID for it */
-		hclge_update_desc_vfid(desc, vport->vport_id, false);
-		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
-	} else {
+	if (status) {
 		/* This mac addr do not exist, add new entry for it */
 		memset(desc[0].data, 0, sizeof(desc[0].data));
 		memset(desc[1].data, 0, sizeof(desc[0].data));
 		memset(desc[2].data, 0, sizeof(desc[0].data));
-		hclge_update_desc_vfid(desc, vport->vport_id, false);
-		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 	}
+	status = hclge_update_desc_vfid(desc, vport->vport_id, false);
+	if (status)
+		return status;
+	status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 
 	if (status == -ENOSPC)
 		dev_err(&hdev->pdev->dev, "mc mac vlan table is full\n");
@@ -6430,7 +6792,9 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
 	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
 	if (!status) {
 		/* This mac addr exist, remove this handle's VFID for it */
-		hclge_update_desc_vfid(desc, vport->vport_id, true);
+		status = hclge_update_desc_vfid(desc, vport->vport_id, true);
+		if (status)
+			return status;
 
 		if (hclge_is_all_function_id_zero(desc))
 			/* All the vfid is zero, so need to delete this entry */
@@ -6759,7 +7123,7 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
 		handle->netdev_flags &= ~HNAE3_VLAN_FLTR;
 }
 
-static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
+static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 				    bool is_kill, u16 vlan, u8 qos,
 				    __be16 proto)
 {
@@ -6771,6 +7135,12 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
 	u8 vf_byte_off;
 	int ret;
 
+	/* if vf vlan table is full, firmware will close vf vlan filter, it
+	 * is unable and unnecessary to add new vlan id to vf vlan filter
+	 */
+	if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill)
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc[0],
 				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
 	hclge_cmd_setup_basic_desc(&desc[1],
@@ -6806,6 +7176,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
 			return 0;
 
 		if (req0->resp_code == HCLGE_VF_VLAN_NO_ENTRY) {
+			set_bit(vfid, hdev->vf_vlan_full);
 			dev_warn(&hdev->pdev->dev,
 				 "vf vlan table is full, vf vlan filter is disabled\n");
 			return 0;
@@ -6819,12 +7190,13 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
 		if (!req0->resp_code)
 			return 0;
 
-		if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) {
-			dev_warn(&hdev->pdev->dev,
-				 "vlan %d filter is not in vf vlan table\n",
-				 vlan);
+		/* vf vlan filter is disabled when vf vlan table is full,
+		 * then new vlan id will not be added into vf vlan table.
+		 * Just return 0 without warning, avoid massive verbose
+		 * print logs when unload.
+		 */
+		if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND)
 			return 0;
-		}
 
 		dev_err(&hdev->pdev->dev,
 			"Kill vf vlan filter fail, ret =%d.\n",
@@ -7140,10 +7512,6 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
 {
 	struct hclge_vport_vlan_cfg *vlan;
 
-	/* vlan 0 is reserved */
-	if (!vlan_id)
-		return;
-
 	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
 	if (!vlan)
 		return;
@@ -7238,6 +7606,43 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
 	mutex_unlock(&hdev->vport_cfg_mutex);
 }
 
+static void hclge_restore_vlan_table(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_dev *hdev = vport->back;
+	u16 vlan_proto, qos;
+	u16 state, vlan_id;
+	int i;
+
+	mutex_lock(&hdev->vport_cfg_mutex);
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
+		vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+		qos = vport->port_base_vlan_cfg.vlan_info.qos;
+		state = vport->port_base_vlan_cfg.state;
+
+		if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
+			hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
+						 vport->vport_id, vlan_id, qos,
+						 false);
+			continue;
+		}
+
+		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+			if (vlan->hd_tbl_status)
+				hclge_set_vlan_filter_hw(hdev,
+							 htons(ETH_P_8021Q),
+							 vport->vport_id,
+							 vlan->vlan_id, 0,
+							 false);
+		}
+	}
+
+	mutex_unlock(&hdev->vport_cfg_mutex);
+}
+
 int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -7415,11 +7820,20 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 	bool writen_to_tbl = false;
 	int ret = 0;
 
-	/* when port based VLAN enabled, we use port based VLAN as the VLAN
-	 * filter entry. In this case, we don't update VLAN filter table
-	 * when user add new VLAN or remove exist VLAN, just update the vport
-	 * VLAN list. The VLAN id in VLAN list won't be writen in VLAN filter
-	 * table until port based VLAN disabled
+	/* When device is resetting, firmware is unable to handle
+	 * mailbox. Just record the vlan id, and remove it after
+	 * reset finished.
+	 */
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+		set_bit(vlan_id, vport->vlan_del_fail_bmap);
+		return -EBUSY;
+	}
+
+	/* When port base vlan enabled, we use port base vlan as the vlan
+	 * filter entry. In this case, we don't update vlan filter table
+	 * when user add new vlan or remove exist vlan, just update the vport
+	 * vlan list. The vlan id in vlan list will be writen in vlan filter
+	 * table until port base vlan disabled
 	 */
 	if (handle->port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
 		ret = hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id,
@@ -7427,16 +7841,53 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 		writen_to_tbl = true;
 	}
 
-	if (ret)
-		return ret;
+	if (!ret) {
+		if (is_kill)
+			hclge_rm_vport_vlan_table(vport, vlan_id, false);
+		else
+			hclge_add_vport_vlan_table(vport, vlan_id,
+						   writen_to_tbl);
+	} else if (is_kill) {
+		/* When remove hw vlan filter failed, record the vlan id,
+		 * and try to remove it from hw later, to be consistence
+		 * with stack
+		 */
+		set_bit(vlan_id, vport->vlan_del_fail_bmap);
+	}
+	return ret;
+}
 
-	if (is_kill)
-		hclge_rm_vport_vlan_table(vport, vlan_id, false);
-	else
-		hclge_add_vport_vlan_table(vport, vlan_id,
-					   writen_to_tbl);
+static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
+{
+#define HCLGE_MAX_SYNC_COUNT	60
 
-	return 0;
+	int i, ret, sync_cnt = 0;
+	u16 vlan_id;
+
+	/* start from vport 1 for PF is always alive */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+
+		vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
+					 VLAN_N_VID);
+		while (vlan_id != VLAN_N_VID) {
+			ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+						       vport->vport_id, vlan_id,
+						       0, true);
+			if (ret && ret != -EINVAL)
+				return;
+
+			clear_bit(vlan_id, vport->vlan_del_fail_bmap);
+			hclge_rm_vport_vlan_table(vport, vlan_id, false);
+
+			sync_cnt++;
+			if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
+				return;
+
+			vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
+						 VLAN_N_VID);
+		}
+	}
 }
 
 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps)
@@ -7463,7 +7914,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu)
 {
 	struct hclge_dev *hdev = vport->back;
-	int i, max_frm_size, ret = 0;
+	int i, max_frm_size, ret;
 
 	max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
 	if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
@@ -7523,7 +7974,8 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 
 	req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
-	hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+	if (enable)
+		hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, 1U);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -7574,7 +8026,7 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	int reset_try_times = 0;
 	int reset_status;
 	u16 queue_gid;
-	int ret = 0;
+	int ret;
 
 	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
@@ -7591,7 +8043,6 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 		return ret;
 	}
 
-	reset_try_times = 0;
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 		/* Wait for tqp hw reset */
 		msleep(20);
@@ -7630,7 +8081,6 @@ void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
 		return;
 	}
 
-	reset_try_times = 0;
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 		/* Wait for tqp hw reset */
 		msleep(20);
@@ -7700,7 +8150,7 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u16 remote_advertising = 0;
-	u16 local_advertising = 0;
+	u16 local_advertising;
 	u32 rx_pause, tx_pause;
 	u8 flowctl;
 
@@ -7733,8 +8183,9 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
 
-	*auto_neg = hclge_get_autoneg(handle);
+	*auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
 		*rx_en = 0;
@@ -7765,11 +8216,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u32 fc_autoneg;
 
-	fc_autoneg = hclge_get_autoneg(handle);
-	if (auto_neg != fc_autoneg) {
-		dev_info(&hdev->pdev->dev,
-			 "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
-		return -EOPNOTSUPP;
+	if (phydev) {
+		fc_autoneg = hclge_get_autoneg(handle);
+		if (auto_neg != fc_autoneg) {
+			dev_info(&hdev->pdev->dev,
+				 "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+			return -EOPNOTSUPP;
+		}
 	}
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
@@ -7780,16 +8233,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 
 	hclge_set_flowctrl_adv(hdev, rx_en, tx_en);
 
-	if (!fc_autoneg)
+	if (!auto_neg)
 		return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
 
 	if (phydev)
 		return phy_start_aneg(phydev);
 
-	if (hdev->pdev->revision == 0x20)
-		return -EOPNOTSUPP;
-
-	return hclge_restart_autoneg(handle);
+	return -EOPNOTSUPP;
 }
 
 static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
@@ -7825,7 +8275,8 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	struct phy_device *phydev = hdev->hw.mac.phydev;
-	int mdix_ctrl, mdix, retval, is_resolved;
+	int mdix_ctrl, mdix, is_resolved;
+	unsigned int retval;
 
 	if (!phydev) {
 		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
@@ -7894,6 +8345,102 @@ static void hclge_info_show(struct hclge_dev *hdev)
 	dev_info(dev, "PF info end.\n");
 }
 
+static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					  struct hclge_vport *vport)
+{
+	struct hnae3_client *client = vport->nic.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int rst_cnt;
+	int ret;
+
+	rst_cnt = hdev->rst_stats.reset_cnt;
+	ret = client->ops->init_instance(&vport->nic);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    rst_cnt != hdev->rst_stats.reset_cnt) {
+		ret = -EBUSY;
+		goto init_nic_err;
+	}
+
+	/* Enable nic hw error interrupts */
+	ret = hclge_config_nic_hw_error(hdev, true);
+	if (ret) {
+		dev_err(&ae_dev->pdev->dev,
+			"fail(%d) to enable hw error interrupts\n", ret);
+		goto init_nic_err;
+	}
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	if (netif_msg_drv(&hdev->vport->nic))
+		hclge_info_show(hdev);
+
+	return ret;
+
+init_nic_err:
+	clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+	while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		msleep(HCLGE_WAIT_RESET_DONE);
+
+	client->ops->uninit_instance(&vport->nic, 0);
+
+	return ret;
+}
+
+static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					   struct hclge_vport *vport)
+{
+	struct hnae3_client *client = vport->roce.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int rst_cnt;
+	int ret;
+
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
+
+	client = hdev->roce_client;
+	ret = hclge_init_roce_base_info(vport);
+	if (ret)
+		return ret;
+
+	rst_cnt = hdev->rst_stats.reset_cnt;
+	ret = client->ops->init_instance(&vport->roce);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    rst_cnt != hdev->rst_stats.reset_cnt) {
+		ret = -EBUSY;
+		goto init_roce_err;
+	}
+
+	/* Enable roce ras interrupts */
+	ret = hclge_config_rocee_ras_interrupt(hdev, true);
+	if (ret) {
+		dev_err(&ae_dev->pdev->dev,
+			"fail(%d) to enable roce ras interrupts\n", ret);
+		goto init_roce_err;
+	}
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+
+init_roce_err:
+	clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+	while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		msleep(HCLGE_WAIT_RESET_DONE);
+
+	hdev->roce_client->ops->uninit_instance(&vport->roce, 0);
+
+	return ret;
+}
+
 static int hclge_init_client_instance(struct hnae3_client *client,
 				      struct hnae3_ae_dev *ae_dev)
 {
@@ -7909,41 +8456,13 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
 			hdev->nic_client = client;
 			vport->nic.client = client;
-			ret = client->ops->init_instance(&vport->nic);
+			ret = hclge_init_nic_client_instance(ae_dev, vport);
 			if (ret)
 				goto clear_nic;
 
-			hnae3_set_client_init_flag(client, ae_dev, 1);
-
-			if (netif_msg_drv(&hdev->vport->nic))
-				hclge_info_show(hdev);
-
-			if (hdev->roce_client &&
-			    hnae3_dev_roce_supported(hdev)) {
-				struct hnae3_client *rc = hdev->roce_client;
-
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					goto clear_roce;
-
-				ret = rc->ops->init_instance(&vport->roce);
-				if (ret)
-					goto clear_roce;
-
-				hnae3_set_client_init_flag(hdev->roce_client,
-							   ae_dev, 1);
-			}
-
-			break;
-		case HNAE3_CLIENT_UNIC:
-			hdev->nic_client = client;
-			vport->nic.client = client;
-
-			ret = client->ops->init_instance(&vport->nic);
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
 			if (ret)
-				goto clear_nic;
-
-			hnae3_set_client_init_flag(client, ae_dev, 1);
+				goto clear_roce;
 
 			break;
 		case HNAE3_CLIENT_ROCE:
@@ -7952,17 +8471,9 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				vport->roce.client = client;
 			}
 
-			if (hdev->roce_client && hdev->nic_client) {
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					goto clear_roce;
-
-				ret = client->ops->init_instance(&vport->roce);
-				if (ret)
-					goto clear_roce;
-
-				hnae3_set_client_init_flag(client, ae_dev, 1);
-			}
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
+			if (ret)
+				goto clear_roce;
 
 			break;
 		default:
@@ -7970,7 +8481,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 		}
 	}
 
-	return 0;
+	return ret;
 
 clear_nic:
 	hdev->nic_client = NULL;
@@ -7992,6 +8503,10 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport = &hdev->vport[i];
 		if (hdev->roce_client) {
+			clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+				msleep(HCLGE_WAIT_RESET_DONE);
+
 			hdev->roce_client->ops->uninit_instance(&vport->roce,
 								0);
 			hdev->roce_client = NULL;
@@ -8000,6 +8515,10 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 		if (client->type == HNAE3_CLIENT_ROCE)
 			return;
 		if (hdev->nic_client && client->ops->uninit_instance) {
+			clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+				msleep(HCLGE_WAIT_RESET_DONE);
+
 			client->ops->uninit_instance(&vport->nic, 0);
 			hdev->nic_client = NULL;
 			vport->nic.client = NULL;
@@ -8081,6 +8600,7 @@ static void hclge_state_init(struct hclge_dev *hdev)
 static void hclge_state_uninit(struct hclge_dev *hdev)
 {
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+	set_bit(HCLGE_STATE_REMOVING, &hdev->state);
 
 	if (hdev->service_timer.function)
 		del_timer_sync(&hdev->service_timer);
@@ -8122,6 +8642,23 @@ static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
 	set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
 }
 
+static void hclge_clear_resetting_state(struct hclge_dev *hdev)
+{
+	u16 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+		int ret;
+
+		 /* Send cmd to clear VF's FUNC_RST_ING */
+		ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
+		if (ret)
+			dev_warn(&hdev->pdev->dev,
+				 "clear vf(%d) rst failed %d!\n",
+				 vport->vport_id, ret);
+	}
+}
+
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
@@ -8143,6 +8680,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 	mutex_init(&hdev->vport_lock);
 	mutex_init(&hdev->vport_cfg_mutex);
+	spin_lock_init(&hdev->fd_rule_lock);
 
 	ret = hclge_pci_init(hdev);
 	if (ret) {
@@ -8270,13 +8808,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		goto err_mdiobus_unreg;
 	}
 
-	ret = hclge_hw_error_set_state(hdev, true);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"fail(%d) to enable hw error interrupts\n", ret);
-		goto err_mdiobus_unreg;
-	}
-
 	INIT_KFIFO(hdev->mac_tnl_log);
 
 	hclge_dcb_ops_set(hdev);
@@ -8288,6 +8819,22 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
 
 	hclge_clear_all_event_cause(hdev);
+	hclge_clear_resetting_state(hdev);
+
+	/* Log and clear the hw errors those already occurred */
+	hclge_handle_all_hns_hw_errors(ae_dev);
+
+	/* request delayed reset for the error recovery because an immediate
+	 * global reset on a PF affecting pending initialization of other PFs
+	 */
+	if (ae_dev->hw_err_reset_req) {
+		enum hnae3_reset_type reset_level;
+
+		reset_level = hclge_get_reset_level(ae_dev,
+						    &ae_dev->hw_err_reset_req);
+		hclge_set_def_reset_request(ae_dev, reset_level);
+		mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
+	}
 
 	/* Enable MISC vector(vector0) */
 	hclge_enable_vector(&hdev->misc_vector, true);
@@ -8342,6 +8889,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 	hclge_stats_clear(hdev);
 	memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
+	memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full));
 
 	ret = hclge_cmd_init(hdev);
 	if (ret) {
@@ -8393,21 +8941,31 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 	ret = hclge_init_fd_config(hdev);
 	if (ret) {
-		dev_err(&pdev->dev,
-			"fd table init fail, ret=%d\n", ret);
+		dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret);
 		return ret;
 	}
 
 	/* Re-enable the hw error interrupts because
-	 * the interrupts get disabled on core/global reset.
+	 * the interrupts get disabled on global reset.
 	 */
-	ret = hclge_hw_error_set_state(hdev, true);
+	ret = hclge_config_nic_hw_error(hdev, true);
 	if (ret) {
 		dev_err(&pdev->dev,
-			"fail(%d) to re-enable HNS hw error interrupts\n", ret);
+			"fail(%d) to re-enable NIC hw error interrupts\n",
+			ret);
 		return ret;
 	}
 
+	if (hdev->roce_client) {
+		ret = hclge_config_rocee_ras_interrupt(hdev, true);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"fail(%d) to re-enable roce ras interrupts\n",
+				ret);
+			return ret;
+		}
+	}
+
 	hclge_reset_vport_state(hdev);
 
 	dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
@@ -8432,8 +8990,11 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_enable_vector(&hdev->misc_vector, false);
 	synchronize_irq(hdev->misc_vector.vector_irq);
 
+	/* Disable all hw interrupts */
 	hclge_config_mac_tnl_int(hdev, false);
-	hclge_hw_error_set_state(hdev, false);
+	hclge_config_nic_hw_error(hdev, false);
+	hclge_config_rocee_ras_interrupt(hdev, false);
+
 	hclge_cmd_uninit(hdev);
 	hclge_misc_irq_uninit(hdev);
 	hclge_pci_uninit(hdev);
@@ -8478,15 +9039,16 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
 	struct hclge_dev *hdev = vport->back;
+	u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
 	int cur_rss_size = kinfo->rss_size;
 	int cur_tqps = kinfo->num_tqps;
-	u16 tc_offset[HCLGE_MAX_TC_NUM];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
-	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u16 roundup_size;
 	u32 *rss_indir;
-	int ret, i;
+	unsigned int i;
+	int ret;
 
 	kinfo->req_rss_size = new_tqps_num;
 
@@ -8571,10 +9133,12 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 				 void *data)
 {
 #define HCLGE_32_BIT_REG_RTN_DATANUM 8
+#define HCLGE_32_BIT_DESC_NODATA_LEN 2
 
 	struct hclge_desc *desc;
 	u32 *reg_val = data;
 	__le32 *desc_data;
+	int nodata_num;
 	int cmd_num;
 	int i, k, n;
 	int ret;
@@ -8582,7 +9146,9 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 	if (regs_num == 0)
 		return 0;
 
-	cmd_num = DIV_ROUND_UP(regs_num + 2, HCLGE_32_BIT_REG_RTN_DATANUM);
+	nodata_num = HCLGE_32_BIT_DESC_NODATA_LEN;
+	cmd_num = DIV_ROUND_UP(regs_num + nodata_num,
+			       HCLGE_32_BIT_REG_RTN_DATANUM);
 	desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc)
 		return -ENOMEM;
@@ -8599,7 +9165,7 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 	for (i = 0; i < cmd_num; i++) {
 		if (i == 0) {
 			desc_data = (__le32 *)(&desc[i].data[0]);
-			n = HCLGE_32_BIT_REG_RTN_DATANUM - 2;
+			n = HCLGE_32_BIT_REG_RTN_DATANUM - nodata_num;
 		} else {
 			desc_data = (__le32 *)(&desc[i]);
 			n = HCLGE_32_BIT_REG_RTN_DATANUM;
@@ -8621,10 +9187,12 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 				 void *data)
 {
 #define HCLGE_64_BIT_REG_RTN_DATANUM 4
+#define HCLGE_64_BIT_DESC_NODATA_LEN 1
 
 	struct hclge_desc *desc;
 	u64 *reg_val = data;
 	__le64 *desc_data;
+	int nodata_len;
 	int cmd_num;
 	int i, k, n;
 	int ret;
@@ -8632,7 +9200,9 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 	if (regs_num == 0)
 		return 0;
 
-	cmd_num = DIV_ROUND_UP(regs_num + 1, HCLGE_64_BIT_REG_RTN_DATANUM);
+	nodata_len = HCLGE_64_BIT_DESC_NODATA_LEN;
+	cmd_num = DIV_ROUND_UP(regs_num + nodata_len,
+			       HCLGE_64_BIT_REG_RTN_DATANUM);
 	desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc)
 		return -ENOMEM;
@@ -8649,7 +9219,7 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 	for (i = 0; i < cmd_num; i++) {
 		if (i == 0) {
 			desc_data = (__le64 *)(&desc[i].data[0]);
-			n = HCLGE_64_BIT_REG_RTN_DATANUM - 1;
+			n = HCLGE_64_BIT_REG_RTN_DATANUM - nodata_len;
 		} else {
 			desc_data = (__le64 *)(&desc[i]);
 			n = HCLGE_64_BIT_REG_RTN_DATANUM;
@@ -8876,6 +9446,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_autoneg = hclge_set_autoneg,
 	.get_autoneg = hclge_get_autoneg,
 	.restart_autoneg = hclge_restart_autoneg,
+	.halt_autoneg = hclge_halt_autoneg,
 	.get_pauseparam = hclge_get_pauseparam,
 	.set_pauseparam = hclge_set_pauseparam,
 	.set_mtu = hclge_set_mtu,
@@ -8892,6 +9463,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
 	.enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag,
 	.reset_event = hclge_reset_event,
+	.get_reset_level = hclge_get_reset_level,
 	.set_default_reset_request = hclge_set_def_reset_request,
 	.get_tqps_and_rss_info = hclge_get_tqps_and_rss_info,
 	.set_channels = hclge_set_channels,
@@ -8908,6 +9480,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_fd_all_rules = hclge_get_all_rules,
 	.restore_fd_rules = hclge_restore_fd_entries,
 	.enable_fd = hclge_enable_fd,
+	.add_arfs_entry = hclge_add_fd_entry_by_arfs,
 	.dbg_run_cmd = hclge_dbg_run_cmd,
 	.handle_hw_ras_error = hclge_handle_hw_ras_error,
 	.get_hw_reset_stat = hclge_get_hw_reset_stat,
@@ -8918,6 +9491,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_timer_task = hclge_set_timer_task,
 	.mac_connect_phy = hclge_mac_connect_phy,
 	.mac_disconnect_phy = hclge_mac_disconnect_phy,
+	.restore_vlan_table = hclge_restore_vlan_table,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index dd06b11187b0..6a12285f4c76 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -201,6 +201,8 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_DOWN,
 	HCLGE_STATE_DISABLED,
 	HCLGE_STATE_REMOVING,
+	HCLGE_STATE_NIC_REGISTERED,
+	HCLGE_STATE_ROCE_REGISTERED,
 	HCLGE_STATE_SERVICE_INITED,
 	HCLGE_STATE_SERVICE_SCHED,
 	HCLGE_STATE_RST_SERVICE_SCHED,
@@ -472,6 +474,7 @@ enum HCLGE_FD_KEY_TYPE {
 enum HCLGE_FD_STAGE {
 	HCLGE_FD_STAGE_1,
 	HCLGE_FD_STAGE_2,
+	MAX_STAGE_NUM,
 };
 
 /* OUTER_XXX indicates tuples in tunnel header of tunnel packet
@@ -526,7 +529,7 @@ enum HCLGE_FD_META_DATA {
 
 struct key_info {
 	u8 key_type;
-	u8 key_length;
+	u8 key_length; /* use bit as unit */
 };
 
 static const struct key_info meta_data_key_info[] = {
@@ -578,6 +581,16 @@ static const struct key_info tuple_key_info[] = {
 #define MAX_KEY_BYTES	(MAX_KEY_DWORDS * 4)
 #define MAX_META_DATA_LENGTH	32
 
+/* assigned by firmware, the real filter number for each pf may be less */
+#define MAX_FD_FILTER_NUM	4096
+#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL	5
+
+enum HCLGE_FD_ACTIVE_RULE_TYPE {
+	HCLGE_FD_RULE_NONE,
+	HCLGE_FD_ARFS_ACTIVE,
+	HCLGE_FD_EP_ACTIVE,
+};
+
 enum HCLGE_FD_PACKET_TYPE {
 	NIC_PACKET,
 	ROCE_PACKET,
@@ -600,18 +613,23 @@ struct hclge_fd_key_cfg {
 
 struct hclge_fd_cfg {
 	u8 fd_mode;
-	u16 max_key_length;
+	u16 max_key_length; /* use bit as unit */
 	u32 proto_support;
-	u32 rule_num[2]; /* rule entry number */
-	u16 cnt_num[2]; /* rule hit counter number */
-	struct hclge_fd_key_cfg key_cfg[2];
+	u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */
+	u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */
+	struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM];
 };
 
+#define IPV4_INDEX	3
+#define IPV6_SIZE	4
 struct hclge_fd_rule_tuples {
-	u8 src_mac[6];
-	u8 dst_mac[6];
-	u32 src_ip[4];
-	u32 dst_ip[4];
+	u8 src_mac[ETH_ALEN];
+	u8 dst_mac[ETH_ALEN];
+	/* Be compatible for ip address of both ipv4 and ipv6.
+	 * For ipv4 address, we store it in src/dst_ip[3].
+	 */
+	u32 src_ip[IPV6_SIZE];
+	u32 dst_ip[IPV6_SIZE];
 	u16 src_port;
 	u16 dst_port;
 	u16 vlan_tag1;
@@ -630,6 +648,8 @@ struct hclge_fd_rule {
 	u16 vf_id;
 	u16 queue_id;
 	u16 location;
+	u16 flow_id;	/* only used for arfs */
+	enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
 };
 
 struct hclge_fd_ad_data {
@@ -679,6 +699,20 @@ struct hclge_mac_tnl_stats {
 	u32 status;
 };
 
+#define HCLGE_RESET_INTERVAL	(10 * HZ)
+#define HCLGE_WAIT_RESET_DONE	100
+
+#pragma pack(1)
+struct hclge_vf_vlan_cfg {
+	u8 mbx_cmd;
+	u8 subcode;
+	u8 is_kill;
+	u16 vlan;
+	u16 proto;
+};
+
+#pragma pack()
+
 /* For each bit of TCAM entry, it uses a pair of 'x' and
  * 'y' to indicate which value to match, like below:
  * ----------------------------------
@@ -806,10 +840,15 @@ struct hclge_dev {
 	struct hclge_vlan_type_cfg vlan_type_cfg;
 
 	unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)];
+	unsigned long vf_vlan_full[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
 
 	struct hclge_fd_cfg fd_cfg;
 	struct hlist_head fd_rule_list;
+	spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
 	u16 hclge_fd_rule_num;
+	u16 fd_arfs_expire_timer;
+	unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
+	enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
 	u8 fd_en;
 
 	u16 wanted_umv_size;
@@ -891,13 +930,14 @@ struct hclge_vport {
 	u32 bw_limit;		/* VSI BW Limit (0 = disabled) */
 	u8  dwrr;
 
+	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
 	struct hclge_port_base_vlan_config port_base_vlan_cfg;
 	struct hclge_tx_vtag_cfg  txvlan_cfg;
 	struct hclge_rx_vtag_cfg  rxvlan_cfg;
 
 	u16 used_umv_num;
 
-	int vport_id;
+	u16 vport_id;
 	struct hclge_dev *back;  /* Back reference to associated dev */
 	struct hnae3_handle nic;
 	struct hnae3_handle roce;
@@ -959,7 +999,7 @@ int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 int hclge_vport_start(struct hclge_vport *vport);
 void hclge_vport_stop(struct hclge_vport *vport);
 int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
-int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf);
+int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf);
 u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
 int hclge_notify_client(struct hclge_dev *hdev,
 			enum hnae3_reset_notify_type type);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0e04e63f2a94..a38ac7cfe16b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -29,6 +29,10 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
 			"PF fail to gen resp to VF len %d exceeds max len %d\n",
 			resp_data_len,
 			HCLGE_MBX_MAX_RESP_DATA_SIZE);
+		/* If resp_data_len is too long, set the value to max length
+		 * and return the msg to VF
+		 */
+		resp_data_len = HCLGE_MBX_MAX_RESP_DATA_SIZE;
 	}
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
@@ -93,7 +97,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
 	else if (hdev->reset_type == HNAE3_FLR_RESET)
 		reset_type = HNAE3_VF_FULL_RESET;
 	else
-		return -EINVAL;
+		reset_type = HNAE3_VF_FUNC_RESET;
 
 	memcpy(&msg_data[0], &reset_type, sizeof(u16));
 
@@ -192,12 +196,10 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
 		return ret;
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain);
-	if (ret)
-		return ret;
 
 	hclge_free_vector_ring_chain(&ring_chain);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
@@ -308,21 +310,23 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
 static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
 				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
+	struct hclge_vf_vlan_cfg *msg_cmd;
 	int status = 0;
 
-	if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) {
+	msg_cmd = (struct hclge_vf_vlan_cfg *)mbx_req->msg;
+	if (msg_cmd->subcode == HCLGE_MBX_VLAN_FILTER) {
 		struct hnae3_handle *handle = &vport->nic;
 		u16 vlan, proto;
 		bool is_kill;
 
-		is_kill = !!mbx_req->msg[2];
-		memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan));
-		memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
+		is_kill = !!msg_cmd->is_kill;
+		vlan =  msg_cmd->vlan;
+		proto =  msg_cmd->proto;
 		status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
 					       vlan, is_kill);
-	} else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) {
+	} else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) {
 		struct hnae3_handle *handle = &vport->nic;
-		bool en = mbx_req->msg[2] ? true : false;
+		bool en = msg_cmd->is_kill ? true : false;
 
 		status = hclge_en_hw_strip_rxvtag(handle, en);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) {
@@ -365,13 +369,14 @@ static int hclge_get_vf_tcinfo(struct hclge_vport *vport,
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	u8 vf_tc_map = 0;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	for (i = 0; i < kinfo->num_tc; i++)
 		vf_tc_map |= BIT(i);
 
 	ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &vf_tc_map,
-				   sizeof(u8));
+				   sizeof(vf_tc_map));
 
 	return ret;
 }
@@ -553,7 +558,8 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
-	int ret, flag;
+	unsigned int flag;
+	int ret;
 
 	/* handle all the mailbox requests in the queue */
 	while (!hclge_cmd_crq_empty(&hdev->hw)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 1e8134892d77..abb1b438564e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -55,9 +55,9 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
 	hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-			HCLGE_MDIO_PHYID_S, phyid);
+			HCLGE_MDIO_PHYID_S, (u32)phyid);
 	hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-			HCLGE_MDIO_PHYREG_S, regnum);
+			HCLGE_MDIO_PHYREG_S, (u32)regnum);
 
 	hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
 	hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
@@ -93,9 +93,9 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
 	hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-			HCLGE_MDIO_PHYID_S, phyid);
+			HCLGE_MDIO_PHYID_S, (u32)phyid);
 	hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-			HCLGE_MDIO_PHYREG_S, regnum);
+			HCLGE_MDIO_PHYREG_S, (u32)regnum);
 
 	hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
 	hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
@@ -224,6 +224,13 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle)
 	linkmode_and(phydev->supported, phydev->supported, mask);
 	linkmode_copy(phydev->advertising, phydev->supported);
 
+	/* supported flag is Pause and Asym Pause, but default advertising
+	 * should be rx on, tx on, so need clear Asym Pause in advertising
+	 * flag
+	 */
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+			   phydev->advertising);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index a7bbb6d3091a..3f41fa2bc414 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -43,18 +43,23 @@ enum hclge_shaper_level {
 static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 				  u8 *ir_b, u8 *ir_u, u8 *ir_s)
 {
+#define DIVISOR_CLK		(1000 * 8)
+#define DIVISOR_IR_B_126	(126 * DIVISOR_CLK)
+
 	const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
 		6 * 256,        /* Prioriy level */
 		6 * 32,         /* Prioriy group level */
 		6 * 8,          /* Port level */
 		6 * 256         /* Qset level */
 	};
-	u8 ir_u_calc = 0, ir_s_calc = 0;
+	u8 ir_u_calc = 0;
+	u8 ir_s_calc = 0;
 	u32 ir_calc;
 	u32 tick;
 
 	/* Calc tick */
-	if (shaper_level >= HCLGE_SHAPER_LVL_CNT)
+	if (shaper_level >= HCLGE_SHAPER_LVL_CNT ||
+	    ir > HCLGE_ETHER_MAX_RATE)
 		return -EINVAL;
 
 	tick = tick_array[shaper_level];
@@ -66,7 +71,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	 * ir_calc = ---------------- * 1000
 	 *		tick * 1
 	 */
-	ir_calc = (1008000 + (tick >> 1) - 1) / tick;
+	ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick;
 
 	if (ir_calc == ir) {
 		*ir_b = 126;
@@ -78,27 +83,28 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 		/* Increasing the denominator to select ir_s value */
 		while (ir_calc > ir) {
 			ir_s_calc++;
-			ir_calc = 1008000 / (tick * (1 << ir_s_calc));
+			ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc));
 		}
 
 		if (ir_calc == ir)
 			*ir_b = 126;
 		else
-			*ir_b = (ir * tick * (1 << ir_s_calc) + 4000) / 8000;
+			*ir_b = (ir * tick * (1 << ir_s_calc) +
+				 (DIVISOR_CLK >> 1)) / DIVISOR_CLK;
 	} else {
 		/* Increasing the numerator to select ir_u value */
 		u32 numerator;
 
 		while (ir_calc < ir) {
 			ir_u_calc++;
-			numerator = 1008000 * (1 << ir_u_calc);
+			numerator = DIVISOR_IR_B_126 * (1 << ir_u_calc);
 			ir_calc = (numerator + (tick >> 1)) / tick;
 		}
 
 		if (ir_calc == ir) {
 			*ir_b = 126;
 		} else {
-			u32 denominator = (8000 * (1 << --ir_u_calc));
+			u32 denominator = (DIVISOR_CLK * (1 << --ir_u_calc));
 			*ir_b = (ir * tick + (denominator >> 1)) / denominator;
 		}
 	}
@@ -119,14 +125,13 @@ static int hclge_pfc_stats_get(struct hclge_dev *hdev,
 	      opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
 		return -EINVAL;
 
-	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1; i++) {
 		hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
-		if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
-			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-		else
-			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	}
 
+	hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+
 	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
 	if (ret)
 		return ret;
@@ -219,8 +224,7 @@ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 	trans_gap = pause_param->pause_trans_gap;
 	trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
-					 trans_time);
+	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap, trans_time);
 }
 
 static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
@@ -361,29 +365,36 @@ static int hclge_tm_qs_weight_cfg(struct hclge_dev *hdev, u16 qs_id,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
+static u32 hclge_tm_get_shapping_para(u8 ir_b, u8 ir_u, u8 ir_s,
+				      u8 bs_b, u8 bs_s)
+{
+	u32 shapping_para = 0;
+
+	hclge_tm_set_field(shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shapping_para, BS_S, bs_s);
+
+	return shapping_para;
+}
+
 static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 				    enum hclge_shap_bucket bucket, u8 pg_id,
-				    u8 ir_b, u8 ir_u, u8 ir_s, u8 bs_b, u8 bs_s)
+				    u32 shapping_para)
 {
 	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
 	enum hclge_opcode_type opcode;
 	struct hclge_desc desc;
-	u32 shapping_para = 0;
 
 	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
-		HCLGE_OPC_TM_PG_C_SHAPPING;
+		 HCLGE_OPC_TM_PG_C_SHAPPING;
 	hclge_cmd_setup_basic_desc(&desc, opcode, false);
 
 	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
 
 	shap_cfg_cmd->pg_id = pg_id;
 
-	hclge_tm_set_field(shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shapping_para, BS_B, bs_b);
-	hclge_tm_set_field(shapping_para, BS_S, bs_s);
-
 	shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -397,7 +408,7 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
 	u8 ir_u, ir_b, ir_s;
 	int ret;
 
-	ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+	ret = hclge_shaper_para_calc(hdev->hw.mac.speed,
 				     HCLGE_SHAPER_LVL_PORT,
 				     &ir_b, &ir_u, &ir_s);
 	if (ret)
@@ -406,11 +417,9 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
 	shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
 
-	hclge_tm_set_field(shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF);
-	hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF);
+	shapping_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+						   HCLGE_SHAPER_BS_U_DEF,
+						   HCLGE_SHAPER_BS_S_DEF);
 
 	shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para);
 
@@ -419,16 +428,14 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
 
 static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 				     enum hclge_shap_bucket bucket, u8 pri_id,
-				     u8 ir_b, u8 ir_u, u8 ir_s,
-				     u8 bs_b, u8 bs_s)
+				     u32 shapping_para)
 {
 	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
 	enum hclge_opcode_type opcode;
 	struct hclge_desc desc;
-	u32 shapping_para = 0;
 
 	opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
-		HCLGE_OPC_TM_PRI_C_SHAPPING;
+		 HCLGE_OPC_TM_PRI_C_SHAPPING;
 
 	hclge_cmd_setup_basic_desc(&desc, opcode, false);
 
@@ -436,12 +443,6 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pri_id = pri_id;
 
-	hclge_tm_set_field(shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shapping_para, BS_B, bs_b);
-	hclge_tm_set_field(shapping_para, BS_S, bs_s);
-
 	shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -531,6 +532,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 	max_rss_size = min_t(u16, hdev->rss_size_max,
 			     vport->alloc_tqps / kinfo->num_tc);
 
+	/* Set to user value, no larger than max_rss_size. */
 	if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
 	    kinfo->req_rss_size <= max_rss_size) {
 		dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
@@ -538,6 +540,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 		kinfo->rss_size = kinfo->req_rss_size;
 	} else if (kinfo->rss_size > max_rss_size ||
 		   (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+		/* Set to the maximum specification value (max_rss_size). */
 		dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
 			 kinfo->rss_size, max_rss_size);
 		kinfo->rss_size = max_rss_size;
@@ -595,8 +598,10 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 		hdev->tm_info.prio_tc[i] =
 			(i >= hdev->tm_info.num_tc) ? 0 : i;
 
-	/* DCB is enabled if we have more than 1 TC */
-	if (hdev->tm_info.num_tc > 1)
+	/* DCB is enabled if we have more than 1 TC or pfc_en is
+	 * non-zero.
+	 */
+	if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
 		hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
 	else
 		hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
@@ -604,12 +609,14 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 
 static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 {
+#define BW_PERCENT	100
+
 	u8 i;
 
 	for (i = 0; i < hdev->tm_info.num_pg; i++) {
 		int k;
 
-		hdev->tm_info.pg_dwrr[i] = i ? 0 : 100;
+		hdev->tm_info.pg_dwrr[i] = i ? 0 : BW_PERCENT;
 
 		hdev->tm_info.pg_info[i].pg_id = i;
 		hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR;
@@ -621,7 +628,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 
 		hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map;
 		for (k = 0; k < hdev->tm_info.num_tc; k++)
-			hdev->tm_info.pg_info[i].tc_dwrr[k] = 100;
+			hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT;
 	}
 }
 
@@ -682,6 +689,7 @@ static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
 static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 {
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 	u32 i;
 
@@ -699,18 +707,21 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 		if (ret)
 			return ret;
 
+		shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pg_shapping_cfg(hdev,
 					       HCLGE_TM_SHAP_C_BUCKET, i,
-					       0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-					       HCLGE_SHAPER_BS_S_DEF);
+					       shaper_para);
 		if (ret)
 			return ret;
 
+		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pg_shapping_cfg(hdev,
 					       HCLGE_TM_SHAP_P_BUCKET, i,
-					       ir_b, ir_u, ir_s,
-					       HCLGE_SHAPER_BS_U_DEF,
-					       HCLGE_SHAPER_BS_S_DEF);
+					       shaper_para);
 		if (ret)
 			return ret;
 	}
@@ -730,8 +741,7 @@ static int hclge_tm_pg_dwrr_cfg(struct hclge_dev *hdev)
 	/* pg to prio */
 	for (i = 0; i < hdev->tm_info.num_pg; i++) {
 		/* Cfg dwrr */
-		ret = hclge_tm_pg_weight_cfg(hdev, i,
-					     hdev->tm_info.pg_dwrr[i]);
+		ret = hclge_tm_pg_weight_cfg(hdev, i, hdev->tm_info.pg_dwrr[i]);
 		if (ret)
 			return ret;
 	}
@@ -811,6 +821,7 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
 static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 {
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 	u32 i;
 
@@ -822,17 +833,19 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 		if (ret)
 			return ret;
 
-		ret = hclge_tm_pri_shapping_cfg(
-			hdev, HCLGE_TM_SHAP_C_BUCKET, i,
-			0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-			HCLGE_SHAPER_BS_S_DEF);
+		shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
+		ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i,
+						shaper_para);
 		if (ret)
 			return ret;
 
-		ret = hclge_tm_pri_shapping_cfg(
-			hdev, HCLGE_TM_SHAP_P_BUCKET, i,
-			ir_b, ir_u, ir_s, HCLGE_SHAPER_BS_U_DEF,
-			HCLGE_SHAPER_BS_S_DEF);
+		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
+		ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
+						shaper_para);
 		if (ret)
 			return ret;
 	}
@@ -844,6 +857,7 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
 {
 	struct hclge_dev *hdev = vport->back;
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 
 	ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF,
@@ -851,18 +865,19 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
 	if (ret)
 		return ret;
 
+	shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+						 HCLGE_SHAPER_BS_U_DEF,
+						 HCLGE_SHAPER_BS_S_DEF);
 	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET,
-					vport->vport_id,
-					0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-					HCLGE_SHAPER_BS_S_DEF);
+					vport->vport_id, shaper_para);
 	if (ret)
 		return ret;
 
+	shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+						 HCLGE_SHAPER_BS_U_DEF,
+						 HCLGE_SHAPER_BS_S_DEF);
 	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET,
-					vport->vport_id,
-					ir_b, ir_u, ir_s,
-					HCLGE_SHAPER_BS_U_DEF,
-					HCLGE_SHAPER_BS_S_DEF);
+					vport->vport_id, shaper_para);
 	if (ret)
 		return ret;
 
@@ -964,7 +979,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev)
 
 	struct hclge_ets_tc_weight_cmd *ets_weight;
 	struct hclge_desc desc;
-	int i;
+	unsigned int i;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, false);
 	ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data;
@@ -1124,6 +1139,9 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
 	int ret;
 	u8 i;
 
+	if (vport->vport_id >= HNAE3_MAX_TC)
+		return -EINVAL;
+
 	ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id);
 	if (ret)
 		return ret;
@@ -1212,8 +1230,8 @@ static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
 	struct hclge_mac *mac = &hdev->hw.mac;
 
 	return hclge_pause_param_cfg(hdev, mac->mac_addr,
-					 HCLGE_DEFAULT_PAUSE_TRANS_GAP,
-					 HCLGE_DEFAULT_PAUSE_TRANS_TIME);
+				     HCLGE_DEFAULT_PAUSE_TRANS_GAP,
+				     HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
 
 static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
@@ -1358,7 +1376,8 @@ void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
 
 void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
 {
-	u8 i, bit_map = 0;
+	u8 bit_map = 0;
+	u8 i;
 
 	hdev->tm_info.num_tc = num_tc;
 
@@ -1375,6 +1394,19 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
 	hclge_tm_schd_info_init(hdev);
 }
 
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
+{
+	/* DCB is enabled if we have more than 1 TC or pfc_en is
+	 * non-zero.
+	 */
+	if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
+		hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+	else
+		hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+
+	hclge_pfc_info_init(hdev);
+}
+
 int hclge_tm_init_hw(struct hclge_dev *hdev, bool init)
 {
 	int ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index f60e540c7a62..818610988d34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -12,7 +12,7 @@
 
 #define HCLGE_TM_PORT_BASE_MODE_MSK	BIT(0)
 
-#define HCLGE_DEFAULT_PAUSE_TRANS_GAP	0xFF
+#define HCLGE_DEFAULT_PAUSE_TRANS_GAP	0x7F
 #define HCLGE_DEFAULT_PAUSE_TRANS_TIME	0xFFFF
 
 /* SP or DWRR */
@@ -147,6 +147,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init);
 int hclge_tm_schd_setup_hw(struct hclge_dev *hdev);
 void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
 void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev);
 int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev, bool init);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index 6193f8fa7cf3..53804d95ea90 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -6,4 +6,4 @@
 ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
-hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
-\ No newline at end of file
+hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 71f356fc2446..652b796044e3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -98,7 +98,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
 
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
@@ -110,7 +109,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
 
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
@@ -179,6 +177,38 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
 		desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR);
 }
 
+static int hclgevf_cmd_convert_err_code(u16 desc_ret)
+{
+	switch (desc_ret) {
+	case HCLGEVF_CMD_EXEC_SUCCESS:
+		return 0;
+	case HCLGEVF_CMD_NO_AUTH:
+		return -EPERM;
+	case HCLGEVF_CMD_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case HCLGEVF_CMD_QUEUE_FULL:
+		return -EXFULL;
+	case HCLGEVF_CMD_NEXT_ERR:
+		return -ENOSR;
+	case HCLGEVF_CMD_UNEXE_ERR:
+		return -ENOTBLK;
+	case HCLGEVF_CMD_PARA_ERR:
+		return -EINVAL;
+	case HCLGEVF_CMD_RESULT_ERR:
+		return -ERANGE;
+	case HCLGEVF_CMD_TIMEOUT:
+		return -ETIME;
+	case HCLGEVF_CMD_HILINK_ERR:
+		return -ENOLINK;
+	case HCLGEVF_CMD_QUEUE_ILLEGAL:
+		return -ENXIO;
+	case HCLGEVF_CMD_INVALID:
+		return -EBADR;
+	default:
+		return -EIO;
+	}
+}
+
 /* hclgevf_cmd_send - send command to command queue
  * @hw: pointer to the hw struct
  * @desc: prefilled descriptor for describing the command
@@ -190,6 +220,7 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
 int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 {
 	struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev;
+	struct hclgevf_cmq_ring *csq = &hw->cmq.csq;
 	struct hclgevf_desc *desc_to_use;
 	bool complete = false;
 	u32 timeout = 0;
@@ -201,8 +232,17 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 
 	spin_lock_bh(&hw->cmq.csq.lock);
 
-	if (num > hclgevf_ring_space(&hw->cmq.csq) ||
-	    test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+	if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
+	if (num > hclgevf_ring_space(&hw->cmq.csq)) {
+		/* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+		 * need update the SW HEAD pointer csq->next_to_clean
+		 */
+		csq->next_to_clean = hclgevf_read_dev(hw,
+						      HCLGEVF_NIC_CSQ_HEAD_REG);
 		spin_unlock_bh(&hw->cmq.csq.lock);
 		return -EBUSY;
 	}
@@ -251,11 +291,7 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 			else
 				retval = le16_to_cpu(desc[0].retval);
 
-			if ((enum hclgevf_cmd_return_status)retval ==
-			    HCLGEVF_CMD_EXEC_SUCCESS)
-				status = 0;
-			else
-				status = -EIO;
+			status = hclgevf_cmd_convert_err_code(retval);
 			hw->cmq.last_status = (enum hclgevf_cmd_status)retval;
 			ntc++;
 			handle++;
@@ -265,14 +301,13 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 	}
 
 	if (!complete)
-		status = -EAGAIN;
+		status = -EBADE;
 
 	/* Clean the command send queue */
 	handle = hclgevf_cmd_csq_clean(hw);
-	if (handle != num) {
+	if (handle != num)
 		dev_warn(&hdev->pdev->dev,
 			 "cleaned %d, need to clean %d\n", handle, num);
-	}
 
 	spin_unlock_bh(&hw->cmq.csq.lock);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 47030b42341f..127a434a56f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -46,9 +46,17 @@ struct hclgevf_cmq_ring {
 
 enum hclgevf_cmd_return_status {
 	HCLGEVF_CMD_EXEC_SUCCESS	= 0,
-	HCLGEVF_CMD_NO_AUTH	= 1,
-	HCLGEVF_CMD_NOT_EXEC	= 2,
-	HCLGEVF_CMD_QUEUE_FULL	= 3,
+	HCLGEVF_CMD_NO_AUTH		= 1,
+	HCLGEVF_CMD_NOT_SUPPORTED	= 2,
+	HCLGEVF_CMD_QUEUE_FULL		= 3,
+	HCLGEVF_CMD_NEXT_ERR		= 4,
+	HCLGEVF_CMD_UNEXE_ERR		= 5,
+	HCLGEVF_CMD_PARA_ERR		= 6,
+	HCLGEVF_CMD_RESULT_ERR		= 7,
+	HCLGEVF_CMD_TIMEOUT		= 8,
+	HCLGEVF_CMD_HILINK_ERR		= 9,
+	HCLGEVF_CMD_QUEUE_ILLEGAL	= 10,
+	HCLGEVF_CMD_INVALID		= 11,
 };
 
 enum hclgevf_cmd_status {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5d53467ee2d2..a13a0e101c3b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -11,6 +11,8 @@
 
 #define HCLGEVF_NAME	"hclgevf"
 
+#define HCLGEVF_RESET_MAX_FAIL_CNT	5
+
 static int hclgevf_reset_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
@@ -83,8 +85,7 @@ static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG,
 					     HCLGEVF_TQP_INTR_GL2_REG,
 					     HCLGEVF_TQP_INTR_RL_REG};
 
-static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
-	struct hnae3_handle *handle)
+static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle)
 {
 	if (!handle->client)
 		return container_of(handle, struct hclgevf_dev, nic);
@@ -232,7 +233,7 @@ static int hclgevf_get_tc_info(struct hclgevf_dev *hdev)
 	int status;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_TCINFO, 0, NULL, 0,
-				      true, &resp_msg, sizeof(u8));
+				      true, &resp_msg, sizeof(resp_msg));
 	if (status) {
 		dev_err(&hdev->pdev->dev,
 			"VF request to get TC info from PF failed %d",
@@ -321,7 +322,8 @@ static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id)
 	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
 
 	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QID_IN_PF, 0, msg_data,
-				   2, true, resp_data, 2);
+				   sizeof(msg_data), true, resp_data,
+				   sizeof(resp_data));
 	if (!ret)
 		qid_in_pf = *(u16 *)resp_data;
 
@@ -382,7 +384,7 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
 	struct hnae3_handle *nic = &hdev->nic;
 	struct hnae3_knic_private_info *kinfo;
 	u16 new_tqps = hdev->num_tqps;
-	int i;
+	unsigned int i;
 
 	kinfo = &nic->kinfo;
 	kinfo->num_tc = 0;
@@ -418,7 +420,7 @@ static void hclgevf_request_link_info(struct hclgevf_dev *hdev)
 	u8 resp_msg;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_STATUS, 0, NULL,
-				      0, false, &resp_msg, sizeof(u8));
+				      0, false, &resp_msg, sizeof(resp_msg));
 	if (status)
 		dev_err(&hdev->pdev->dev,
 			"VF failed to fetch link status(%d) from PF", status);
@@ -453,11 +455,13 @@ static void hclgevf_update_link_mode(struct hclgevf_dev *hdev)
 	u8 resp_msg;
 
 	send_msg = HCLGEVF_ADVERTISING;
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, &send_msg,
-			     sizeof(u8), false, &resp_msg, sizeof(u8));
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0,
+			     &send_msg, sizeof(send_msg), false,
+			     &resp_msg, sizeof(resp_msg));
 	send_msg = HCLGEVF_SUPPORTED;
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, &send_msg,
-			     sizeof(u8), false, &resp_msg, sizeof(u8));
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0,
+			     &send_msg, sizeof(send_msg), false,
+			     &resp_msg, sizeof(resp_msg));
 }
 
 static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
@@ -470,12 +474,6 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
 	nic->numa_node_mask = hdev->numa_node_mask;
 	nic->flags |= HNAE3_SUPPORT_VF;
 
-	if (hdev->ae_dev->dev_type != HNAE3_DEV_KNIC) {
-		dev_err(&hdev->pdev->dev, "unsupported device type %d\n",
-			hdev->ae_dev->dev_type);
-		return -EINVAL;
-	}
-
 	ret = hclgevf_knic_setup(hdev);
 	if (ret)
 		dev_err(&hdev->pdev->dev, "VF knic setup failed %d\n",
@@ -544,14 +542,16 @@ static int hclgevf_set_rss_algo_key(struct hclgevf_dev *hdev,
 				    const u8 hfunc, const u8 *key)
 {
 	struct hclgevf_rss_config_cmd *req;
+	unsigned int key_offset = 0;
 	struct hclgevf_desc desc;
-	int key_offset;
+	int key_counts;
 	int key_size;
 	int ret;
 
+	key_counts = HCLGEVF_RSS_KEY_SIZE;
 	req = (struct hclgevf_rss_config_cmd *)desc.data;
 
-	for (key_offset = 0; key_offset < 3; key_offset++) {
+	while (key_counts) {
 		hclgevf_cmd_setup_basic_desc(&desc,
 					     HCLGEVF_OPC_RSS_GENERIC_CONFIG,
 					     false);
@@ -560,15 +560,12 @@ static int hclgevf_set_rss_algo_key(struct hclgevf_dev *hdev,
 		req->hash_config |=
 			(key_offset << HCLGEVF_RSS_HASH_KEY_OFFSET_B);
 
-		if (key_offset == 2)
-			key_size =
-			HCLGEVF_RSS_KEY_SIZE - HCLGEVF_RSS_HASH_KEY_NUM * 2;
-		else
-			key_size = HCLGEVF_RSS_HASH_KEY_NUM;
-
+		key_size = min(HCLGEVF_RSS_HASH_KEY_NUM, key_counts);
 		memcpy(req->hash_key,
 		       key + key_offset * HCLGEVF_RSS_HASH_KEY_NUM, key_size);
 
+		key_counts -= key_size;
+		key_offset++;
 		ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -631,7 +628,7 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev,  u16 rss_size)
 	struct hclgevf_desc desc;
 	u16 roundup_size;
 	int status;
-	int i;
+	unsigned int i;
 
 	req = (struct hclgevf_rss_tc_mode_cmd *)desc.data;
 
@@ -997,6 +994,8 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 	u8 type;
 
 	req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+	type = en ? HCLGE_MBX_MAP_RING_TO_VECTOR :
+		HCLGE_MBX_UNMAP_RING_TO_VECTOR;
 
 	for (node = ring_chain; node; node = node->next) {
 		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
@@ -1006,9 +1005,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 			hclgevf_cmd_setup_basic_desc(&desc,
 						     HCLGEVF_OPC_MBX_VF_TO_PF,
 						     false);
-			type = en ?
-				HCLGE_MBX_MAP_RING_TO_VECTOR :
-				HCLGE_MBX_UNMAP_RING_TO_VECTOR;
 			req->msg[0] = type;
 			req->msg[1] = vector_id;
 		}
@@ -1134,7 +1130,7 @@ static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc)
 	return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc);
 }
 
-static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
+static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
 			      int stream_id, bool enable)
 {
 	struct hclgevf_cfg_com_tqp_queue_cmd *req;
@@ -1147,7 +1143,8 @@ static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
 				     false);
 	req->tqp_id = cpu_to_le16(tqp_id & HCLGEVF_RING_ID_MASK);
 	req->stream_id = cpu_to_le16(stream_id);
-	req->enable |= enable << HCLGEVF_TQP_ENABLE_B;
+	if (enable)
+		req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
 
 	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 	if (status)
@@ -1193,7 +1190,7 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
 			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-				      subcode, msg_data, ETH_ALEN * 2,
+				      subcode, msg_data, sizeof(msg_data),
 				      true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
@@ -1248,19 +1245,61 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
 #define HCLGEVF_VLAN_MBX_MSG_LEN 5
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[HCLGEVF_VLAN_MBX_MSG_LEN];
+	int ret;
 
-	if (vlan_id > 4095)
+	if (vlan_id > HCLGEVF_MAX_VLAN_ID)
 		return -EINVAL;
 
 	if (proto != htons(ETH_P_8021Q))
 		return -EPROTONOSUPPORT;
 
+	/* When device is resetting, firmware is unable to handle
+	 * mailbox. Just record the vlan id, and remove it after
+	 * reset finished.
+	 */
+	if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+		set_bit(vlan_id, hdev->vlan_del_fail_bmap);
+		return -EBUSY;
+	}
+
 	msg_data[0] = is_kill;
 	memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id));
 	memcpy(&msg_data[3], &proto, sizeof(proto));
-	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
-				    HCLGE_MBX_VLAN_FILTER, msg_data,
-				    HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+				   HCLGE_MBX_VLAN_FILTER, msg_data,
+				   HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+
+	/* When remove hw vlan filter failed, record the vlan id,
+	 * and try to remove it from hw later, to be consistence
+	 * with stack.
+	 */
+	if (is_kill && ret)
+		set_bit(vlan_id, hdev->vlan_del_fail_bmap);
+
+	return ret;
+}
+
+static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_MAX_SYNC_COUNT	60
+	struct hnae3_handle *handle = &hdev->nic;
+	int ret, sync_cnt = 0;
+	u16 vlan_id;
+
+	vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
+	while (vlan_id != VLAN_N_VID) {
+		ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
+					      vlan_id, true);
+		if (ret)
+			return;
+
+		clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
+		sync_cnt++;
+		if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
+			return;
+
+		vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
+	}
 }
 
 static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -1280,7 +1319,7 @@ static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	u8 msg_data[2];
 	int ret;
 
-	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
+	memcpy(msg_data, &queue_id, sizeof(queue_id));
 
 	/* disable vf queue before send queue reset msg to PF */
 	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
@@ -1288,7 +1327,7 @@ static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 		return ret;
 
 	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
-				    2, true, NULL, 0);
+				    sizeof(msg_data), true, NULL, 0);
 }
 
 static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu)
@@ -1306,6 +1345,10 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
 	struct hnae3_handle *handle = &hdev->nic;
 	int ret;
 
+	if (!test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state) ||
+	    !client)
+		return 0;
+
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
@@ -1410,6 +1453,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 
 static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 {
+#define HCLGEVF_RESET_SYNC_TIME 100
+
 	int ret = 0;
 
 	switch (hdev->reset_type) {
@@ -1427,13 +1472,34 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 	}
 
 	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
-
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGEVF_RESET_SYNC_TIME);
+	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+			  HCLGEVF_NIC_CMQ_ENABLE);
 	dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n",
 		 hdev->reset_type, ret);
 
 	return ret;
 }
 
+static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
+{
+	hdev->rst_stats.rst_fail_cnt++;
+	dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n",
+		hdev->rst_stats.rst_fail_cnt);
+
+	if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
+		set_bit(hdev->reset_type, &hdev->reset_pending);
+
+	if (hclgevf_is_reset_pending(hdev)) {
+		set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		hclgevf_reset_task_schedule(hdev);
+	} else {
+		hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+				  HCLGEVF_NIC_CMQ_ENABLE);
+	}
+}
+
 static int hclgevf_reset(struct hclgevf_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
@@ -1490,19 +1556,13 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
 	hdev->last_reset_time = jiffies;
 	ae_dev->reset_type = HNAE3_NONE_RESET;
 	hdev->rst_stats.rst_done_cnt++;
+	hdev->rst_stats.rst_fail_cnt = 0;
 
 	return ret;
 err_reset_lock:
 	rtnl_unlock();
 err_reset:
-	/* When VF reset failed, only the higher level reset asserted by PF
-	 * can restore it, so re-initialize the command queue to receive
-	 * this higher reset event.
-	 */
-	hclgevf_cmd_init(hdev);
-	dev_err(&hdev->pdev->dev, "failed to reset VF\n");
-	if (hclgevf_is_reset_pending(hdev))
-		hclgevf_reset_task_schedule(hdev);
+	hclgevf_reset_err_handle(hdev);
 
 	return ret;
 }
@@ -1612,7 +1672,8 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) {
+	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
 		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->rst_service_task);
 	}
@@ -1648,7 +1709,8 @@ static void hclgevf_service_timer(struct timer_list *t)
 {
 	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
 
-	mod_timer(&hdev->service_timer, jiffies + 5 * HZ);
+	mod_timer(&hdev->service_timer, jiffies +
+		  HCLGEVF_GENERAL_TASK_INTERVAL * HZ);
 
 	hdev->stats_timer++;
 	hclgevf_task_schedule(hdev);
@@ -1668,9 +1730,9 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
 			       &hdev->reset_state)) {
 		/* PF has initmated that it is about to reset the hardware.
-		 * We now have to poll & check if harware has actually completed
-		 * the reset sequence. On hardware reset completion, VF needs to
-		 * reset the client and ae device.
+		 * We now have to poll & check if hardware has actually
+		 * completed the reset sequence. On hardware reset completion,
+		 * VF needs to reset the client and ae device.
 		 */
 		hdev->reset_attempts = 0;
 
@@ -1686,7 +1748,7 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
 				      &hdev->reset_state)) {
 		/* we could be here when either of below happens:
-		 * 1. reset was initiated due to watchdog timeout due to
+		 * 1. reset was initiated due to watchdog timeout caused by
 		 *    a. IMP was earlier reset and our TX got choked down and
 		 *       which resulted in watchdog reacting and inducing VF
 		 *       reset. This also means our cmdq would be unreliable.
@@ -1748,7 +1810,8 @@ static void hclgevf_keep_alive_timer(struct timer_list *t)
 	struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer);
 
 	schedule_work(&hdev->keep_alive_task);
-	mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ);
+	mod_timer(&hdev->keep_alive_timer, jiffies +
+		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
 }
 
 static void hclgevf_keep_alive_task(struct work_struct *work)
@@ -1763,7 +1826,7 @@ static void hclgevf_keep_alive_task(struct work_struct *work)
 		return;
 
 	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL,
-				   0, false, &respmsg, sizeof(u8));
+				   0, false, &respmsg, sizeof(respmsg));
 	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"VF sends keep alive cmd failed(=%d)\n", ret);
@@ -1789,6 +1852,8 @@ static void hclgevf_service_task(struct work_struct *work)
 
 	hclgevf_update_link_mode(hdev);
 
+	hclgevf_sync_vlan_filter(hdev);
+
 	hclgevf_deferred_task_schedule(hdev);
 
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
@@ -1995,7 +2060,7 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
 
 	}
 
-	/* Initialize RSS indirect table for each vport */
+	/* Initialize RSS indirect table */
 	for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
 		rss_cfg->rss_indirection_tbl[i] = i % hdev->rss_size_max;
 
@@ -2008,9 +2073,6 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
 
 static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
 {
-	/* other vlan config(like, VLAN TX/RX offload) would also be added
-	 * here later
-	 */
 	return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0,
 				       false);
 }
@@ -2032,7 +2094,6 @@ static int hclgevf_ae_start(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
-	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
 
 	hclgevf_request_link_info(hdev);
@@ -2056,7 +2117,6 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 			if (hclgevf_reset_tqp(handle, i))
 				break;
 
-	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
 	hclgevf_update_link_status(hdev, 0);
 }
@@ -2080,7 +2140,8 @@ static int hclgevf_client_start(struct hnae3_handle *handle)
 	if (ret)
 		return ret;
 
-	mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ);
+	mod_timer(&hdev->keep_alive_timer, jiffies +
+		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
 
 	return 0;
 }
@@ -2123,6 +2184,7 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 {
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+	set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
 
 	if (hdev->keep_alive_timer.function)
 		del_timer_sync(&hdev->keep_alive_timer);
@@ -2249,49 +2311,68 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev)
 	dev_info(dev, "VF info end.\n");
 }
 
-static int hclgevf_init_client_instance(struct hnae3_client *client,
-					struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					    struct hnae3_client *client)
 {
 	struct hclgevf_dev *hdev = ae_dev->priv;
 	int ret;
 
-	switch (client->type) {
-	case HNAE3_CLIENT_KNIC:
-		hdev->nic_client = client;
-		hdev->nic.client = client;
+	ret = client->ops->init_instance(&hdev->nic);
+	if (ret)
+		return ret;
 
-		ret = client->ops->init_instance(&hdev->nic);
-		if (ret)
-			goto clear_nic;
+	set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+	hnae3_set_client_init_flag(client, ae_dev, 1);
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
+	if (netif_msg_drv(&hdev->nic))
+		hclgevf_info_show(hdev);
 
-		if (netif_msg_drv(&hdev->nic))
-			hclgevf_info_show(hdev);
+	return 0;
+}
 
-		if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
-			struct hnae3_client *rc = hdev->roce_client;
+static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					     struct hnae3_client *client)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
 
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				goto clear_roce;
-			ret = rc->ops->init_instance(&hdev->roce);
-			if (ret)
-				goto clear_roce;
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
 
-			hnae3_set_client_init_flag(hdev->roce_client, ae_dev,
-						   1);
-		}
-		break;
-	case HNAE3_CLIENT_UNIC:
+	ret = hclgevf_init_roce_base_info(hdev);
+	if (ret)
+		return ret;
+
+	ret = client->ops->init_instance(&hdev->roce);
+	if (ret)
+		return ret;
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+}
+
+static int hclgevf_init_client_instance(struct hnae3_client *client,
+					struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	switch (client->type) {
+	case HNAE3_CLIENT_KNIC:
 		hdev->nic_client = client;
 		hdev->nic.client = client;
 
-		ret = client->ops->init_instance(&hdev->nic);
+		ret = hclgevf_init_nic_client_instance(ae_dev, client);
 		if (ret)
 			goto clear_nic;
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
+		ret = hclgevf_init_roce_client_instance(ae_dev,
+							hdev->roce_client);
+		if (ret)
+			goto clear_roce;
+
 		break;
 	case HNAE3_CLIENT_ROCE:
 		if (hnae3_dev_roce_supported(hdev)) {
@@ -2299,17 +2380,10 @@ static int hclgevf_init_client_instance(struct hnae3_client *client,
 			hdev->roce.client = client;
 		}
 
-		if (hdev->roce_client && hdev->nic_client) {
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				goto clear_roce;
-
-			ret = client->ops->init_instance(&hdev->roce);
-			if (ret)
-				goto clear_roce;
-		}
+		ret = hclgevf_init_roce_client_instance(ae_dev, client);
+		if (ret)
+			goto clear_roce;
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
 		break;
 	default:
 		return -EINVAL;
@@ -2342,6 +2416,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
 	/* un-init nic/unic, if this was not called by roce client */
 	if (client->ops->uninit_instance && hdev->nic_client &&
 	    client->type != HNAE3_CLIENT_ROCE) {
+		clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
 		client->ops->uninit_instance(&hdev->nic, 0);
 		hdev->nic_client = NULL;
 		hdev->nic.client = NULL;
@@ -2512,6 +2588,12 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 		return ret;
 	}
 
+	if (pdev->revision >= 0x21) {
+		ret = hclgevf_set_promisc_mode(hdev, true);
+		if (ret)
+			return ret;
+	}
+
 	dev_info(&hdev->pdev->dev, "Reset done\n");
 
 	return 0;
@@ -2591,9 +2673,11 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	 * firmware makes sure broadcast packets can be accepted.
 	 * For revision 0x21, default to enable broadcast promisc mode.
 	 */
-	ret = hclgevf_set_promisc_mode(hdev, true);
-	if (ret)
-		goto err_config;
+	if (pdev->revision >= 0x21) {
+		ret = hclgevf_set_promisc_mode(hdev, true);
+		if (ret)
+			goto err_config;
+	}
 
 	/* Initialize RSS for this VF */
 	ret = hclgevf_rss_init_hw(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index cc52f54f8c08..5a9e30998a8f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -4,6 +4,7 @@
 #ifndef __HCLGEVF_MAIN_H
 #define __HCLGEVF_MAIN_H
 #include <linux/fs.h>
+#include <linux/if_vlan.h>
 #include <linux/types.h>
 #include "hclge_mbx.h"
 #include "hclgevf_cmd.h"
@@ -12,9 +13,12 @@
 #define HCLGEVF_MOD_VERSION "1.0"
 #define HCLGEVF_DRIVER_NAME "hclgevf"
 
+#define HCLGEVF_MAX_VLAN_ID	4095
 #define HCLGEVF_MISC_VECTOR_NUM		0
 
 #define HCLGEVF_INVALID_VPORT		0xffff
+#define HCLGEVF_GENERAL_TASK_INTERVAL	  5
+#define HCLGEVF_KEEP_ALIVE_TASK_INTERVAL  2
 
 /* This number in actual depends upon the total number of VFs
  * created by physical function. But the maximum number of
@@ -130,6 +134,8 @@ enum hclgevf_states {
 	HCLGEVF_STATE_DOWN,
 	HCLGEVF_STATE_DISABLED,
 	HCLGEVF_STATE_IRQ_INITED,
+	HCLGEVF_STATE_REMOVING,
+	HCLGEVF_STATE_NIC_REGISTERED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_SERVICE_SCHED,
@@ -220,6 +226,7 @@ struct hclgevf_rst_stats {
 	u32 vf_rst_cnt;			/* the number of VF reset */
 	u32 rst_done_cnt;		/* the number of reset completed */
 	u32 hw_rst_done_cnt;		/* the number of HW reset completed */
+	u32 rst_fail_cnt;		/* the number of VF reset fail */
 };
 
 struct hclgevf_dev {
@@ -265,6 +272,8 @@ struct hclgevf_dev {
 	u16 *vector_status;
 	int *vector_irq;
 
+	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
+
 	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
 	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 30f2e9352cf3..f60b80bd605e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -102,7 +102,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 					  ~HCLGE_MBX_NEED_RESP_BIT;
 	req->msg[0] = code;
 	req->msg[1] = subcode;
-	memcpy(&req->msg[2], msg_data, msg_len);
+	if (msg_data)
+		memcpy(&req->msg[2], msg_data, msg_len);
 
 	/* synchronous send */
 	if (need_resp) {
diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index 99de5b6607d5..fe88ab88cacc 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_HINIC) += hinic.o
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
 	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
 	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
-	   hinic_common.o
+	   hinic_common.o hinic_ethtool.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 353276fdcaed..a209b14160cc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -22,6 +22,7 @@
 enum hinic_flags {
 	HINIC_LINK_UP = BIT(0),
 	HINIC_INTF_UP = BIT(1),
+	HINIC_RSS_ENABLE = BIT(2),
 };
 
 struct hinic_rx_mode_work {
@@ -29,6 +30,23 @@ struct hinic_rx_mode_work {
 	u32                     rx_mode;
 };
 
+struct hinic_rss_type {
+	u8 tcp_ipv6_ext;
+	u8 ipv6_ext;
+	u8 tcp_ipv6;
+	u8 ipv6;
+	u8 tcp_ipv4;
+	u8 ipv4;
+	u8 udp_ipv6;
+	u8 udp_ipv4;
+};
+
+enum hinic_rss_hash_type {
+	HINIC_RSS_HASH_ENGINE_TYPE_XOR,
+	HINIC_RSS_HASH_ENGINE_TYPE_TOEP,
+	HINIC_RSS_HASH_ENGINE_TYPE_MAX,
+};
+
 struct hinic_dev {
 	struct net_device               *netdev;
 	struct hinic_hwdev              *hwdev;
@@ -36,6 +54,8 @@ struct hinic_dev {
 	u32                             msg_enable;
 	unsigned int                    tx_weight;
 	unsigned int                    rx_weight;
+	u16				num_qps;
+	u16				max_qps;
 
 	unsigned int                    flags;
 
@@ -50,6 +70,14 @@ struct hinic_dev {
 
 	struct hinic_txq_stats          tx_stats;
 	struct hinic_rxq_stats          rx_stats;
+
+	u8				rss_tmpl_idx;
+	u8				rss_hash_engine;
+	u16				num_rss;
+	u16				rss_limit;
+	struct hinic_rss_type		rss_type;
+	u8				*rss_hkey_user;
+	s32				*rss_indir_user;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
new file mode 100644
index 000000000000..60ec48fe4144
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+
+#include "hinic_hw_qp.h"
+#include "hinic_hw_dev.h"
+#include "hinic_port.h"
+#include "hinic_tx.h"
+#include "hinic_rx.h"
+#include "hinic_dev.h"
+
+static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
+			   enum hinic_speed speed)
+{
+	switch (speed) {
+	case HINIC_SPEED_10MB_LINK:
+		link_ksettings->base.speed = SPEED_10;
+		break;
+
+	case HINIC_SPEED_100MB_LINK:
+		link_ksettings->base.speed = SPEED_100;
+		break;
+
+	case HINIC_SPEED_1000MB_LINK:
+		link_ksettings->base.speed = SPEED_1000;
+		break;
+
+	case HINIC_SPEED_10GB_LINK:
+		link_ksettings->base.speed = SPEED_10000;
+		break;
+
+	case HINIC_SPEED_25GB_LINK:
+		link_ksettings->base.speed = SPEED_25000;
+		break;
+
+	case HINIC_SPEED_40GB_LINK:
+		link_ksettings->base.speed = SPEED_40000;
+		break;
+
+	case HINIC_SPEED_100GB_LINK:
+		link_ksettings->base.speed = SPEED_100000;
+		break;
+
+	default:
+		link_ksettings->base.speed = SPEED_UNKNOWN;
+		break;
+	}
+}
+
+static int hinic_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings
+				    *link_ksettings)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	enum hinic_port_link_state link_state;
+	struct hinic_port_cap port_cap;
+	int err;
+
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+					     Autoneg);
+
+	link_ksettings->base.speed = SPEED_UNKNOWN;
+	link_ksettings->base.autoneg = AUTONEG_DISABLE;
+	link_ksettings->base.duplex = DUPLEX_UNKNOWN;
+
+	err = hinic_port_get_cap(nic_dev, &port_cap);
+	if (err)
+		return err;
+
+	err = hinic_port_link_state(nic_dev, &link_state);
+	if (err)
+		return err;
+
+	if (link_state != HINIC_LINK_STATE_UP)
+		return err;
+
+	set_link_speed(link_ksettings, port_cap.speed);
+
+	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
+
+	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
+		link_ksettings->base.autoneg = AUTONEG_ENABLE;
+
+	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
+					   DUPLEX_FULL : DUPLEX_HALF;
+	return 0;
+}
+
+static void hinic_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *info)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u8 mgmt_ver[HINIC_MGMT_VERSION_MAX_LEN] = {0};
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	int err;
+
+	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
+
+	err = hinic_get_mgmt_version(nic_dev, mgmt_ver);
+	if (err)
+		return;
+
+	snprintf(info->fw_version, sizeof(info->fw_version), "%s", mgmt_ver);
+}
+
+static void hinic_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	ring->rx_max_pending = HINIC_RQ_DEPTH;
+	ring->tx_max_pending = HINIC_SQ_DEPTH;
+	ring->rx_pending = HINIC_RQ_DEPTH;
+	ring->tx_pending = HINIC_SQ_DEPTH;
+}
+
+static void hinic_get_channels(struct net_device *netdev,
+			       struct ethtool_channels *channels)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+
+	channels->max_rx = hwdev->nic_cap.max_qps;
+	channels->max_tx = hwdev->nic_cap.max_qps;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = hinic_hwdev_num_qps(hwdev);
+	channels->tx_count = hinic_hwdev_num_qps(hwdev);
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
+static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct hinic_rss_type rss_type = { 0 };
+	int err;
+
+	cmd->data = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return 0;
+
+	err = hinic_get_rss_type(nic_dev, nic_dev->rss_tmpl_idx,
+				 &rss_type);
+	if (err)
+		return err;
+
+	cmd->data = RXH_IP_SRC | RXH_IP_DST;
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		if (rss_type.tcp_ipv4)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case TCP_V6_FLOW:
+		if (rss_type.tcp_ipv6)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V4_FLOW:
+		if (rss_type.udp_ipv4)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V6_FLOW:
+		if (rss_type.udp_ipv6)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		break;
+	default:
+		cmd->data = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_l4_rss_hash_ops(struct ethtool_rxnfc *cmd,
+			       struct hinic_rss_type *rss_type)
+{
+	u8 rss_l4_en = 0;
+
+	switch (cmd->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+	case 0:
+		rss_l4_en = 0;
+		break;
+	case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+		rss_l4_en = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		rss_type->tcp_ipv4 = rss_l4_en;
+		break;
+	case TCP_V6_FLOW:
+		rss_type->tcp_ipv6 = rss_l4_en;
+		break;
+	case UDP_V4_FLOW:
+		rss_type->udp_ipv4 = rss_l4_en;
+		break;
+	case UDP_V6_FLOW:
+		rss_type->udp_ipv6 = rss_l4_en;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rss_hash_opts(struct hinic_dev *nic_dev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct hinic_rss_type *rss_type = &nic_dev->rss_type;
+	int err;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE)) {
+		cmd->data = 0;
+		return -EOPNOTSUPP;
+	}
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (cmd->data & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 |
+		RXH_L4_B_2_3))
+		return -EINVAL;
+
+	/* We need at least the IP SRC and DEST fields for hashing */
+	if (!(cmd->data & RXH_IP_SRC) || !(cmd->data & RXH_IP_DST))
+		return -EINVAL;
+
+	err = hinic_get_rss_type(nic_dev,
+				 nic_dev->rss_tmpl_idx, rss_type);
+	if (err)
+		return -EFAULT;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		err = set_l4_rss_hash_ops(cmd, rss_type);
+		if (err)
+			return err;
+		break;
+	case IPV4_FLOW:
+		rss_type->ipv4 = 1;
+		break;
+	case IPV6_FLOW:
+		rss_type->ipv6 = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = hinic_set_rss_type(nic_dev, nic_dev->rss_tmpl_idx,
+				 *rss_type);
+	if (err)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int __set_rss_rxfh(struct net_device *netdev,
+			  const u32 *indir, const u8 *key)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+
+	if (indir) {
+		if (!nic_dev->rss_indir_user) {
+			nic_dev->rss_indir_user =
+				kzalloc(sizeof(u32) * HINIC_RSS_INDIR_SIZE,
+					GFP_KERNEL);
+			if (!nic_dev->rss_indir_user)
+				return -ENOMEM;
+		}
+
+		memcpy(nic_dev->rss_indir_user, indir,
+		       sizeof(u32) * HINIC_RSS_INDIR_SIZE);
+
+		err = hinic_rss_set_indir_tbl(nic_dev,
+					      nic_dev->rss_tmpl_idx, indir);
+		if (err)
+			return -EFAULT;
+	}
+
+	if (key) {
+		if (!nic_dev->rss_hkey_user) {
+			nic_dev->rss_hkey_user =
+				kzalloc(HINIC_RSS_KEY_SIZE * 2, GFP_KERNEL);
+
+			if (!nic_dev->rss_hkey_user)
+				return -ENOMEM;
+		}
+
+		memcpy(nic_dev->rss_hkey_user, key, HINIC_RSS_KEY_SIZE);
+
+		err = hinic_rss_set_template_tbl(nic_dev,
+						 nic_dev->rss_tmpl_idx, key);
+		if (err)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int hinic_get_rxnfc(struct net_device *netdev,
+			   struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = nic_dev->num_qps;
+		break;
+	case ETHTOOL_GRXFH:
+		err = hinic_get_rss_hash_opts(nic_dev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int hinic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		err = hinic_set_rss_hash_opts(nic_dev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int hinic_get_rxfh(struct net_device *netdev,
+			  u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u8 hash_engine_type = 0;
+	int err = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return -EOPNOTSUPP;
+
+	if (hfunc) {
+		err = hinic_rss_get_hash_engine(nic_dev,
+						nic_dev->rss_tmpl_idx,
+						&hash_engine_type);
+		if (err)
+			return -EFAULT;
+
+		*hfunc = hash_engine_type ? ETH_RSS_HASH_TOP : ETH_RSS_HASH_XOR;
+	}
+
+	if (indir) {
+		err = hinic_rss_get_indir_tbl(nic_dev,
+					      nic_dev->rss_tmpl_idx, indir);
+		if (err)
+			return -EFAULT;
+	}
+
+	if (key)
+		err = hinic_rss_get_template_tbl(nic_dev,
+						 nic_dev->rss_tmpl_idx, key);
+
+	return err;
+}
+
+static int hinic_set_rxfh(struct net_device *netdev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE) {
+		if (hfunc != ETH_RSS_HASH_TOP && hfunc != ETH_RSS_HASH_XOR)
+			return -EOPNOTSUPP;
+
+		nic_dev->rss_hash_engine = (hfunc == ETH_RSS_HASH_XOR) ?
+			HINIC_RSS_HASH_ENGINE_TYPE_XOR :
+			HINIC_RSS_HASH_ENGINE_TYPE_TOEP;
+		err = hinic_rss_set_hash_engine
+			(nic_dev, nic_dev->rss_tmpl_idx,
+			nic_dev->rss_hash_engine);
+		if (err)
+			return -EFAULT;
+	}
+
+	err = __set_rss_rxfh(netdev, indir, key);
+
+	return err;
+}
+
+static u32 hinic_get_rxfh_key_size(struct net_device *netdev)
+{
+	return HINIC_RSS_KEY_SIZE;
+}
+
+static u32 hinic_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return HINIC_RSS_INDIR_SIZE;
+}
+
+#define ARRAY_LEN(arr) ((int)((int)sizeof(arr) / (int)sizeof(arr[0])))
+
+#define HINIC_FUNC_STAT(_stat_item) {	\
+	.name = #_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_vport_stats, _stat_item), \
+	.offset = offsetof(struct hinic_vport_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_function_stats[] = {
+	HINIC_FUNC_STAT(tx_unicast_pkts_vport),
+	HINIC_FUNC_STAT(tx_unicast_bytes_vport),
+	HINIC_FUNC_STAT(tx_multicast_pkts_vport),
+	HINIC_FUNC_STAT(tx_multicast_bytes_vport),
+	HINIC_FUNC_STAT(tx_broadcast_pkts_vport),
+	HINIC_FUNC_STAT(tx_broadcast_bytes_vport),
+
+	HINIC_FUNC_STAT(rx_unicast_pkts_vport),
+	HINIC_FUNC_STAT(rx_unicast_bytes_vport),
+	HINIC_FUNC_STAT(rx_multicast_pkts_vport),
+	HINIC_FUNC_STAT(rx_multicast_bytes_vport),
+	HINIC_FUNC_STAT(rx_broadcast_pkts_vport),
+	HINIC_FUNC_STAT(rx_broadcast_bytes_vport),
+
+	HINIC_FUNC_STAT(tx_discard_vport),
+	HINIC_FUNC_STAT(rx_discard_vport),
+	HINIC_FUNC_STAT(tx_err_vport),
+	HINIC_FUNC_STAT(rx_err_vport),
+};
+
+#define HINIC_PORT_STAT(_stat_item) { \
+	.name = #_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_phy_port_stats, _stat_item), \
+	.offset = offsetof(struct hinic_phy_port_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_port_stats[] = {
+	HINIC_PORT_STAT(mac_rx_total_pkt_num),
+	HINIC_PORT_STAT(mac_rx_total_oct_num),
+	HINIC_PORT_STAT(mac_rx_bad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_bad_oct_num),
+	HINIC_PORT_STAT(mac_rx_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_good_oct_num),
+	HINIC_PORT_STAT(mac_rx_uni_pkt_num),
+	HINIC_PORT_STAT(mac_rx_multi_pkt_num),
+	HINIC_PORT_STAT(mac_rx_broad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_total_pkt_num),
+	HINIC_PORT_STAT(mac_tx_total_oct_num),
+	HINIC_PORT_STAT(mac_tx_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_bad_oct_num),
+	HINIC_PORT_STAT(mac_tx_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_good_oct_num),
+	HINIC_PORT_STAT(mac_tx_uni_pkt_num),
+	HINIC_PORT_STAT(mac_tx_multi_pkt_num),
+	HINIC_PORT_STAT(mac_tx_broad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_fragment_pkt_num),
+	HINIC_PORT_STAT(mac_rx_undersize_pkt_num),
+	HINIC_PORT_STAT(mac_rx_undermin_pkt_num),
+	HINIC_PORT_STAT(mac_rx_64_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_65_127_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_128_255_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_256_511_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_512_1023_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1024_1518_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_2047_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_2048_4095_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_4096_8191_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_8192_9216_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_9217_12287_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_12288_16383_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_max_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_max_bad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_oversize_pkt_num),
+	HINIC_PORT_STAT(mac_rx_jabber_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pause_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri0_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri1_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri2_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri3_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri4_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri5_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri6_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri7_pkt_num),
+	HINIC_PORT_STAT(mac_rx_control_pkt_num),
+	HINIC_PORT_STAT(mac_rx_sym_err_pkt_num),
+	HINIC_PORT_STAT(mac_rx_fcs_err_pkt_num),
+	HINIC_PORT_STAT(mac_rx_send_app_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_send_app_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_fragment_pkt_num),
+	HINIC_PORT_STAT(mac_tx_undersize_pkt_num),
+	HINIC_PORT_STAT(mac_tx_undermin_pkt_num),
+	HINIC_PORT_STAT(mac_tx_64_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_65_127_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_128_255_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_256_511_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_512_1023_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1024_1518_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_2047_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_2048_4095_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_4096_8191_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_8192_9216_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_9217_12287_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_12288_16383_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_max_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_max_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_oversize_pkt_num),
+	HINIC_PORT_STAT(mac_tx_jabber_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pause_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri0_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri1_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri2_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri3_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri4_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri5_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri6_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri7_pkt_num),
+	HINIC_PORT_STAT(mac_tx_control_pkt_num),
+	HINIC_PORT_STAT(mac_tx_err_all_pkt_num),
+	HINIC_PORT_STAT(mac_tx_from_app_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_from_app_bad_pkt_num),
+};
+
+#define HINIC_TXQ_STAT(_stat_item) { \
+	.name = "txq%d_"#_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_txq_stats, _stat_item), \
+	.offset = offsetof(struct hinic_txq_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_tx_queue_stats[] = {
+	HINIC_TXQ_STAT(pkts),
+	HINIC_TXQ_STAT(bytes),
+	HINIC_TXQ_STAT(tx_busy),
+	HINIC_TXQ_STAT(tx_wake),
+	HINIC_TXQ_STAT(tx_dropped),
+	HINIC_TXQ_STAT(big_frags_pkts),
+};
+
+#define HINIC_RXQ_STAT(_stat_item) { \
+	.name = "rxq%d_"#_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_rxq_stats, _stat_item), \
+	.offset = offsetof(struct hinic_rxq_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_rx_queue_stats[] = {
+	HINIC_RXQ_STAT(pkts),
+	HINIC_RXQ_STAT(bytes),
+	HINIC_RXQ_STAT(errors),
+	HINIC_RXQ_STAT(csum_errors),
+	HINIC_RXQ_STAT(other_errors),
+};
+
+static void get_drv_queue_stats(struct hinic_dev *nic_dev, u64 *data)
+{
+	struct hinic_txq_stats txq_stats;
+	struct hinic_rxq_stats rxq_stats;
+	u16 i = 0, j = 0, qid = 0;
+	char *p;
+
+	for (qid = 0; qid < nic_dev->num_qps; qid++) {
+		if (!nic_dev->txqs)
+			break;
+
+		hinic_txq_get_stats(&nic_dev->txqs[qid], &txq_stats);
+		for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++, i++) {
+			p = (char *)&txq_stats +
+				hinic_tx_queue_stats[j].offset;
+			data[i] = (hinic_tx_queue_stats[j].size ==
+					sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+		}
+	}
+
+	for (qid = 0; qid < nic_dev->num_qps; qid++) {
+		if (!nic_dev->rxqs)
+			break;
+
+		hinic_rxq_get_stats(&nic_dev->rxqs[qid], &rxq_stats);
+		for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++, i++) {
+			p = (char *)&rxq_stats +
+				hinic_rx_queue_stats[j].offset;
+			data[i] = (hinic_rx_queue_stats[j].size ==
+					sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+		}
+	}
+}
+
+static void hinic_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_vport_stats vport_stats = {0};
+	struct hinic_phy_port_stats *port_stats;
+	u16 i = 0, j = 0;
+	char *p;
+	int err;
+
+	err = hinic_get_vport_stats(nic_dev, &vport_stats);
+	if (err)
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get vport stats from firmware\n");
+
+	for (j = 0; j < ARRAY_LEN(hinic_function_stats); j++, i++) {
+		p = (char *)&vport_stats + hinic_function_stats[j].offset;
+		data[i] = (hinic_function_stats[j].size ==
+				sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL);
+	if (!port_stats) {
+		memset(&data[i], 0,
+		       ARRAY_LEN(hinic_port_stats) * sizeof(*data));
+		i += ARRAY_LEN(hinic_port_stats);
+		goto get_drv_stats;
+	}
+
+	err = hinic_get_phy_port_stats(nic_dev, port_stats);
+	if (err)
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get port stats from firmware\n");
+
+	for (j = 0; j < ARRAY_LEN(hinic_port_stats); j++, i++) {
+		p = (char *)port_stats + hinic_port_stats[j].offset;
+		data[i] = (hinic_port_stats[j].size ==
+				sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	kfree(port_stats);
+
+get_drv_stats:
+	get_drv_queue_stats(nic_dev, data + i);
+}
+
+static int hinic_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int count, q_num;
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		q_num = nic_dev->num_qps;
+		count = ARRAY_LEN(hinic_function_stats) +
+			(ARRAY_LEN(hinic_tx_queue_stats) +
+			ARRAY_LEN(hinic_rx_queue_stats)) * q_num;
+
+		count += ARRAY_LEN(hinic_port_stats);
+
+		return count;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void hinic_get_strings(struct net_device *netdev,
+			      u32 stringset, u8 *data)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	char *p = (char *)data;
+	u16 i, j;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_LEN(hinic_function_stats); i++) {
+			memcpy(p, hinic_function_stats[i].name,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < ARRAY_LEN(hinic_port_stats); i++) {
+			memcpy(p, hinic_port_stats[i].name,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < nic_dev->num_qps; i++) {
+			for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++) {
+				sprintf(p, hinic_tx_queue_stats[j].name, i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
+		for (i = 0; i < nic_dev->num_qps; i++) {
+			for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++) {
+				sprintf(p, hinic_rx_queue_stats[j].name, i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
+		return;
+	default:
+		return;
+	}
+}
+
+static const struct ethtool_ops hinic_ethtool_ops = {
+	.get_link_ksettings = hinic_get_link_ksettings,
+	.get_drvinfo = hinic_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = hinic_get_ringparam,
+	.get_channels = hinic_get_channels,
+	.get_rxnfc = hinic_get_rxnfc,
+	.set_rxnfc = hinic_set_rxnfc,
+	.get_rxfh_key_size = hinic_get_rxfh_key_size,
+	.get_rxfh_indir_size = hinic_get_rxfh_indir_size,
+	.get_rxfh = hinic_get_rxfh,
+	.set_rxfh = hinic_set_rxfh,
+	.get_sset_count = hinic_get_sset_count,
+	.get_ethtool_stats = hinic_get_ethtool_stats,
+	.get_strings = hinic_get_strings,
+};
+
+void hinic_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &hinic_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 408705687de6..6f2cf569a283 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -89,9 +89,6 @@ static int get_capability(struct hinic_hwdev *hwdev,
 	if (nic_cap->num_qps > HINIC_Q_CTXT_MAX)
 		nic_cap->num_qps = HINIC_Q_CTXT_MAX;
 
-	/* num_qps must be power of 2 */
-	nic_cap->num_qps = BIT(fls(nic_cap->num_qps) - 1);
-
 	nic_cap->max_qps = dev_cap->max_sqs + 1;
 	if (nic_cap->max_qps != (dev_cap->max_rqs + 1))
 		return -EFAULT;
@@ -304,6 +301,8 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
 	hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT;
 	hw_ioctxt.cmdq_depth = 0;
 
+	hw_ioctxt.lro_en = 1;
+
 	hw_ioctxt.rq_depth  = ilog2(rq_depth);
 
 	hw_ioctxt.rx_buf_sz_idx = HINIC_RX_BUF_SZ_IDX;
@@ -872,6 +871,13 @@ void hinic_free_hwdev(struct hinic_hwdev *hwdev)
 	hinic_free_hwif(hwdev->hwif);
 }
 
+int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+
+	return nic_cap->max_qps;
+}
+
 /**
  * hinic_hwdev_num_qps - return the number QPs available for use
  * @hwdev: the NIC HW device
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index a0a5b7434ad7..b069045de416 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -41,21 +41,73 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
 
+	HINIC_PORT_CMD_SET_LRO		= 25,
+
 	HINIC_PORT_CMD_SET_RX_CSUM	= 26,
 
+	HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27,
+
+	HINIC_PORT_CMD_GET_PORT_STATISTICS = 28,
+
+	HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29,
+
+	HINIC_PORT_CMD_GET_VPORT_STAT	= 30,
+
+	HINIC_PORT_CMD_CLEAN_VPORT_STAT	= 31,
+
+	HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL = 37,
+
 	HINIC_PORT_CMD_SET_PORT_STATE   = 41,
 
+	HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL = 43,
+
+	HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL = 44,
+
+	HINIC_PORT_CMD_SET_RSS_HASH_ENGINE = 45,
+
+	HINIC_PORT_CMD_GET_RSS_HASH_ENGINE = 46,
+
+	HINIC_PORT_CMD_GET_RSS_CTX_TBL  = 47,
+
+	HINIC_PORT_CMD_SET_RSS_CTX_TBL  = 48,
+
+	HINIC_PORT_CMD_RSS_TEMP_MGR	= 49,
+
+	HINIC_PORT_CMD_RSS_CFG		= 66,
+
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
 
+	HINIC_PORT_CMD_GET_MGMT_VERSION = 88,
+
 	HINIC_PORT_CMD_SET_FUNC_STATE   = 93,
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
 	HINIC_PORT_CMD_SET_TSO          = 112,
 
+	HINIC_PORT_CMD_SET_RQ_IQ_MAP	= 115,
+
 	HINIC_PORT_CMD_GET_CAP          = 170,
+
+	HINIC_PORT_CMD_SET_LRO_TIMER	= 244,
 };
 
+enum hinic_ucode_cmd {
+	HINIC_UCODE_CMD_MODIFY_QUEUE_CONTEXT    = 0,
+	HINIC_UCODE_CMD_CLEAN_QUEUE_CONTEXT,
+	HINIC_UCODE_CMD_ARM_SQ,
+	HINIC_UCODE_CMD_ARM_RQ,
+	HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+	HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE,
+	HINIC_UCODE_CMD_GET_RSS_INDIR_TABLE,
+	HINIC_UCODE_CMD_GET_RSS_CONTEXT_TABLE,
+	HINIC_UCODE_CMD_SET_IQ_ENABLE,
+	HINIC_UCODE_CMD_SET_RQ_FLUSH            = 10
+};
+
+#define NIC_RSS_CMD_TEMP_ALLOC  0x01
+#define NIC_RSS_CMD_TEMP_FREE   0x02
+
 enum hinic_mgmt_msg_cmd {
 	HINIC_MGMT_MSG_CMD_BASE         = 160,
 
@@ -97,7 +149,7 @@ struct hinic_cmd_hw_ioctxt {
 	u8      set_cmdq_depth;
 	u8      cmdq_depth;
 
-	u8      rsvd2;
+	u8      lro_en;
 	u8      rsvd3;
 	u8      rsvd4;
 	u8      rsvd5;
@@ -215,6 +267,8 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev);
 
 void hinic_free_hwdev(struct hinic_hwdev *hwdev);
 
+int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev);
+
 int hinic_hwdev_num_qps(struct hinic_hwdev *hwdev);
 
 struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 2d07bdd17432..d66f86fa3f46 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -36,6 +36,7 @@
 
 enum io_cmd {
 	IO_CMD_MODIFY_QUEUE_CTXT = 0,
+	IO_CMD_CLEAN_QUEUE_CTXT,
 };
 
 static void init_db_area_idx(struct hinic_free_db_area *free_db_area)
@@ -201,6 +202,59 @@ static int write_qp_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
 		write_rq_ctxts(func_to_io, base_qpn, num_qps));
 }
 
+static int hinic_clean_queue_offload_ctxt(struct hinic_func_to_io *func_to_io,
+					  enum hinic_qp_ctxt_type ctxt_type)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct hinic_clean_queue_ctxt *ctxt_block;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmdq_buf cmdq_buf;
+	u64 out_param = 0;
+	int err;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	ctxt_block = cmdq_buf.buf;
+	ctxt_block->cmdq_hdr.num_queues = func_to_io->max_qps;
+	ctxt_block->cmdq_hdr.queue_type = ctxt_type;
+	ctxt_block->cmdq_hdr.addr_offset = 0;
+
+	/* TSO/LRO ctxt size: 0x0:0B; 0x1:160B; 0x2:200B; 0x3:240B */
+	ctxt_block->ctxt_size = 0x3;
+
+	hinic_cpu_to_be32(ctxt_block, sizeof(*ctxt_block));
+
+	cmdq_buf.size = sizeof(*ctxt_block);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     IO_CMD_CLEAN_QUEUE_CTXT,
+				     &cmdq_buf, &out_param);
+
+	if (err || out_param) {
+		dev_err(&pdev->dev, "Failed to clean offload ctxts, err: %d, out_param: 0x%llx\n",
+			err, out_param);
+
+		err = -EFAULT;
+	}
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+
+	return err;
+}
+
+static int hinic_clean_qp_offload_ctxt(struct hinic_func_to_io *func_to_io)
+{
+	/* clean LRO/TSO context space */
+	return (hinic_clean_queue_offload_ctxt(func_to_io,
+					       HINIC_QP_CTXT_TYPE_SQ) ||
+		hinic_clean_queue_offload_ctxt(func_to_io,
+					       HINIC_QP_CTXT_TYPE_RQ));
+}
+
 /**
  * init_qp - Initialize a Queue Pair
  * @func_to_io: func to io channel that holds the IO components
@@ -372,6 +426,12 @@ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
 		goto err_write_qp_ctxts;
 	}
 
+	err = hinic_clean_qp_offload_ctxt(func_to_io);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to clean QP contexts space\n");
+		goto err_write_qp_ctxts;
+	}
+
 	return 0;
 
 err_write_qp_ctxts:
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
index 1856fdcc1e32..00900a6640ad 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
@@ -192,6 +192,11 @@ struct hinic_rq_ctxt {
 	u32     wq_block_lo_pfn;
 };
 
+struct hinic_clean_queue_ctxt {
+	struct hinic_qp_ctxt_header	cmdq_hdr;
+	u32				ctxt_size;
+};
+
 struct hinic_sq_ctxt_block {
 	struct hinic_qp_ctxt_header hdr;
 	struct hinic_sq_ctxt sq_ctxt[HINIC_Q_CTXT_MAX];
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index 8991c9a5ef04..f4b6d2c1061f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -210,6 +210,57 @@
 #define HINIC_MSS_DEFAULT		        0x3E00
 #define HINIC_MSS_MIN		                0x50
 
+#define RQ_CQE_STATUS_NUM_LRO_SHIFT		16
+#define RQ_CQE_STATUS_NUM_LRO_MASK		0xFFU
+
+#define RQ_CQE_STATUS_GET(val, member)		(((val) >> \
+			RQ_CQE_STATUS_##member##_SHIFT) & \
+			RQ_CQE_STATUS_##member##_MASK)
+
+#define HINIC_GET_RX_NUM_LRO(status)	\
+		RQ_CQE_STATUS_GET(status, NUM_LRO)
+
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT		0
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK		0xFFFU
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT		21
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK		0x1U
+
+#define RQ_CQE_OFFOLAD_TYPE_GET(val, member)		(((val) >> \
+				RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
+				RQ_CQE_OFFOLAD_TYPE_##member##_MASK)
+
+#define HINIC_GET_RX_PKT_TYPE(offload_type)	\
+			RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
+
+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)	\
+			RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
+
+#define RQ_CQE_SGE_VLAN_MASK				0xFFFFU
+#define RQ_CQE_SGE_VLAN_SHIFT				0
+
+#define RQ_CQE_SGE_GET(val, member)			(((val) >> \
+					RQ_CQE_SGE_##member##_SHIFT) & \
+					RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_TAG(vlan_len)	\
+		RQ_CQE_SGE_GET(vlan_len, VLAN)
+
+#define HINIC_RSS_TYPE_VALID_SHIFT			23
+#define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT		24
+#define HINIC_RSS_TYPE_IPV6_EXT_SHIFT			25
+#define HINIC_RSS_TYPE_TCP_IPV6_SHIFT			26
+#define HINIC_RSS_TYPE_IPV6_SHIFT			27
+#define HINIC_RSS_TYPE_TCP_IPV4_SHIFT			28
+#define HINIC_RSS_TYPE_IPV4_SHIFT			29
+#define HINIC_RSS_TYPE_UDP_IPV6_SHIFT			30
+#define HINIC_RSS_TYPE_UDP_IPV4_SHIFT			31
+
+#define HINIC_RSS_TYPE_SET(val, member)                        \
+		(((u32)(val) & 0x1) << HINIC_RSS_TYPE_##member##_SHIFT)
+
+#define HINIC_RSS_TYPE_GET(val, member)                        \
+		(((u32)(val) >> HINIC_RSS_TYPE_##member##_SHIFT) & 0x1)
+
 enum hinic_l4offload_type {
 	HINIC_L4_OFF_DISABLE            = 0,
 	HINIC_TCP_OFFLOAD_ENABLE        = 1,
@@ -363,7 +414,7 @@ struct hinic_rq_cqe {
 	u32     status;
 	u32     len;
 
-	u32     rsvd2;
+	u32     offload_type;
 	u32     rsvd3;
 	u32     rsvd4;
 	u32     rsvd5;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index b695d29d364c..2411ad270c98 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -53,6 +53,10 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 					 NETIF_MSG_IFUP |                  \
 					 NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
 
+#define HINIC_LRO_MAX_WQE_NUM_DEFAULT	8
+
+#define HINIC_LRO_RX_TIMER_DEFAULT	16
+
 #define VLAN_BITMAP_SIZE(nic_dev)       (ALIGN(VLAN_N_VID, 8) / 8)
 
 #define work_to_rx_mode_work(work)      \
@@ -63,137 +67,9 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 
 static int change_mac_addr(struct net_device *netdev, const u8 *addr);
 
-static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
-			   enum hinic_speed speed)
-{
-	switch (speed) {
-	case HINIC_SPEED_10MB_LINK:
-		link_ksettings->base.speed = SPEED_10;
-		break;
-
-	case HINIC_SPEED_100MB_LINK:
-		link_ksettings->base.speed = SPEED_100;
-		break;
-
-	case HINIC_SPEED_1000MB_LINK:
-		link_ksettings->base.speed = SPEED_1000;
-		break;
-
-	case HINIC_SPEED_10GB_LINK:
-		link_ksettings->base.speed = SPEED_10000;
-		break;
-
-	case HINIC_SPEED_25GB_LINK:
-		link_ksettings->base.speed = SPEED_25000;
-		break;
-
-	case HINIC_SPEED_40GB_LINK:
-		link_ksettings->base.speed = SPEED_40000;
-		break;
-
-	case HINIC_SPEED_100GB_LINK:
-		link_ksettings->base.speed = SPEED_100000;
-		break;
-
-	default:
-		link_ksettings->base.speed = SPEED_UNKNOWN;
-		break;
-	}
-}
-
-static int hinic_get_link_ksettings(struct net_device *netdev,
-				    struct ethtool_link_ksettings
-				    *link_ksettings)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	enum hinic_port_link_state link_state;
-	struct hinic_port_cap port_cap;
-	int err;
-
-	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
-	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
-					     Autoneg);
-
-	link_ksettings->base.speed   = SPEED_UNKNOWN;
-	link_ksettings->base.autoneg = AUTONEG_DISABLE;
-	link_ksettings->base.duplex  = DUPLEX_UNKNOWN;
-
-	err = hinic_port_get_cap(nic_dev, &port_cap);
-	if (err) {
-		netif_err(nic_dev, drv, netdev,
-			  "Failed to get port capabilities\n");
-		return err;
-	}
-
-	err = hinic_port_link_state(nic_dev, &link_state);
-	if (err) {
-		netif_err(nic_dev, drv, netdev,
-			  "Failed to get port link state\n");
-		return err;
-	}
-
-	if (link_state != HINIC_LINK_STATE_UP) {
-		netif_info(nic_dev, drv, netdev, "No link\n");
-		return err;
-	}
-
-	set_link_speed(link_ksettings, port_cap.speed);
-
-	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
-		ethtool_link_ksettings_add_link_mode(link_ksettings,
-						     advertising, Autoneg);
-
-	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
-		link_ksettings->base.autoneg = AUTONEG_ENABLE;
-
-	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
-				       DUPLEX_FULL : DUPLEX_HALF;
-	return 0;
-}
-
-static void hinic_get_drvinfo(struct net_device *netdev,
-			      struct ethtool_drvinfo *info)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-	struct hinic_hwif *hwif = hwdev->hwif;
-
-	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
-	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
-}
-
-static void hinic_get_ringparam(struct net_device *netdev,
-				struct ethtool_ringparam *ring)
-{
-	ring->rx_max_pending = HINIC_RQ_DEPTH;
-	ring->tx_max_pending = HINIC_SQ_DEPTH;
-	ring->rx_pending = HINIC_RQ_DEPTH;
-	ring->tx_pending = HINIC_SQ_DEPTH;
-}
-
-static void hinic_get_channels(struct net_device *netdev,
-			       struct ethtool_channels *channels)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-
-	channels->max_rx = hwdev->nic_cap.max_qps;
-	channels->max_tx = hwdev->nic_cap.max_qps;
-	channels->max_other    = 0;
-	channels->max_combined = 0;
-	channels->rx_count = hinic_hwdev_num_qps(hwdev);
-	channels->tx_count = hinic_hwdev_num_qps(hwdev);
-	channels->other_count    = 0;
-	channels->combined_count = 0;
-}
-
-static const struct ethtool_ops hinic_ethtool_ops = {
-	.get_link_ksettings = hinic_get_link_ksettings,
-	.get_drvinfo = hinic_get_drvinfo,
-	.get_link = ethtool_op_get_link,
-	.get_ringparam = hinic_get_ringparam,
-	.get_channels = hinic_get_channels,
-};
+static int set_features(struct hinic_dev *nic_dev,
+			netdev_features_t pre_features,
+			netdev_features_t features, bool force_change);
 
 static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq)
 {
@@ -207,6 +83,9 @@ static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq)
 	u64_stats_update_begin(&nic_rx_stats->syncp);
 	nic_rx_stats->bytes += rx_stats.bytes;
 	nic_rx_stats->pkts  += rx_stats.pkts;
+	nic_rx_stats->errors += rx_stats.errors;
+	nic_rx_stats->csum_errors += rx_stats.csum_errors;
+	nic_rx_stats->other_errors += rx_stats.other_errors;
 	u64_stats_update_end(&nic_rx_stats->syncp);
 
 	hinic_rxq_clean_stats(rxq);
@@ -227,6 +106,7 @@ static void update_tx_stats(struct hinic_dev *nic_dev, struct hinic_txq *txq)
 	nic_tx_stats->tx_busy += tx_stats.tx_busy;
 	nic_tx_stats->tx_wake += tx_stats.tx_wake;
 	nic_tx_stats->tx_dropped += tx_stats.tx_dropped;
+	nic_tx_stats->big_frags_pkts += tx_stats.big_frags_pkts;
 	u64_stats_update_end(&nic_tx_stats->syncp);
 
 	hinic_txq_clean_stats(txq);
@@ -363,11 +243,135 @@ static void free_rxqs(struct hinic_dev *nic_dev)
 	nic_dev->rxqs = NULL;
 }
 
+static int hinic_configure_max_qnum(struct hinic_dev *nic_dev)
+{
+	int err;
+
+	err = hinic_set_max_qnum(nic_dev, nic_dev->hwdev->nic_cap.max_qps);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int hinic_rss_init(struct hinic_dev *nic_dev)
+{
+	u8 default_rss_key[HINIC_RSS_KEY_SIZE];
+	u8 tmpl_idx = nic_dev->rss_tmpl_idx;
+	u32 *indir_tbl;
+	int err, i;
+
+	indir_tbl = kcalloc(HINIC_RSS_INDIR_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!indir_tbl)
+		return -ENOMEM;
+
+	netdev_rss_key_fill(default_rss_key, sizeof(default_rss_key));
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
+		indir_tbl[i] = ethtool_rxfh_indir_default(i, nic_dev->num_rss);
+
+	err = hinic_rss_set_template_tbl(nic_dev, tmpl_idx, default_rss_key);
+	if (err)
+		goto out;
+
+	err = hinic_rss_set_indir_tbl(nic_dev, tmpl_idx, indir_tbl);
+	if (err)
+		goto out;
+
+	err = hinic_set_rss_type(nic_dev, tmpl_idx, nic_dev->rss_type);
+	if (err)
+		goto out;
+
+	err = hinic_rss_set_hash_engine(nic_dev, tmpl_idx,
+					nic_dev->rss_hash_engine);
+	if (err)
+		goto out;
+
+	err = hinic_rss_cfg(nic_dev, 1, tmpl_idx);
+	if (err)
+		goto out;
+
+out:
+	kfree(indir_tbl);
+	return err;
+}
+
+static void hinic_rss_deinit(struct hinic_dev *nic_dev)
+{
+	hinic_rss_cfg(nic_dev, 0, nic_dev->rss_tmpl_idx);
+}
+
+static void hinic_init_rss_parameters(struct hinic_dev *nic_dev)
+{
+	nic_dev->rss_hash_engine = HINIC_RSS_HASH_ENGINE_TYPE_XOR;
+	nic_dev->rss_type.tcp_ipv6_ext = 1;
+	nic_dev->rss_type.ipv6_ext = 1;
+	nic_dev->rss_type.tcp_ipv6 = 1;
+	nic_dev->rss_type.ipv6 = 1;
+	nic_dev->rss_type.tcp_ipv4 = 1;
+	nic_dev->rss_type.ipv4 = 1;
+	nic_dev->rss_type.udp_ipv6 = 1;
+	nic_dev->rss_type.udp_ipv4 = 1;
+}
+
+static void hinic_enable_rss(struct hinic_dev *nic_dev)
+{
+	struct net_device *netdev = nic_dev->netdev;
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int i, node, err = 0;
+	u16 num_cpus = 0;
+
+	nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev);
+	if (nic_dev->max_qps <= 1) {
+		nic_dev->flags &= ~HINIC_RSS_ENABLE;
+		nic_dev->rss_limit = nic_dev->max_qps;
+		nic_dev->num_qps = nic_dev->max_qps;
+		nic_dev->num_rss = nic_dev->max_qps;
+
+		return;
+	}
+
+	err = hinic_rss_template_alloc(nic_dev, &nic_dev->rss_tmpl_idx);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to alloc tmpl_idx for rss, can't enable rss for this function\n");
+		nic_dev->flags &= ~HINIC_RSS_ENABLE;
+		nic_dev->max_qps = 1;
+		nic_dev->rss_limit = nic_dev->max_qps;
+		nic_dev->num_qps = nic_dev->max_qps;
+		nic_dev->num_rss = nic_dev->max_qps;
+
+		return;
+	}
+
+	nic_dev->flags |= HINIC_RSS_ENABLE;
+
+	for (i = 0; i < num_online_cpus(); i++) {
+		node = cpu_to_node(i);
+		if (node == dev_to_node(&pdev->dev))
+			num_cpus++;
+	}
+
+	if (!num_cpus)
+		num_cpus = num_online_cpus();
+
+	nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus);
+
+	nic_dev->rss_limit = nic_dev->num_qps;
+	nic_dev->num_rss = nic_dev->num_qps;
+
+	hinic_init_rss_parameters(nic_dev);
+	err = hinic_rss_init(nic_dev);
+	if (err)
+		netif_err(nic_dev, drv, netdev, "Failed to init rss\n");
+}
+
 static int hinic_open(struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	enum hinic_port_link_state link_state;
-	int err, ret, num_qps;
+	int err, ret;
 
 	if (!(nic_dev->flags & HINIC_INTF_UP)) {
 		err = hinic_hwdev_ifup(nic_dev->hwdev);
@@ -392,9 +396,17 @@ static int hinic_open(struct net_device *netdev)
 		goto err_create_rxqs;
 	}
 
-	num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
-	netif_set_real_num_tx_queues(netdev, num_qps);
-	netif_set_real_num_rx_queues(netdev, num_qps);
+	hinic_enable_rss(nic_dev);
+
+	err = hinic_configure_max_qnum(nic_dev);
+	if (err) {
+		netif_err(nic_dev, drv, nic_dev->netdev,
+			  "Failed to configure the maximum number of queues\n");
+		goto err_port_state;
+	}
+
+	netif_set_real_num_tx_queues(netdev, nic_dev->num_qps);
+	netif_set_real_num_rx_queues(netdev, nic_dev->num_qps);
 
 	err = hinic_port_set_state(nic_dev, HINIC_PORT_ENABLE);
 	if (err) {
@@ -450,9 +462,12 @@ err_func_port_state:
 	if (ret)
 		netif_warn(nic_dev, drv, netdev,
 			   "Failed to revert port state\n");
-
 err_port_state:
 	free_rxqs(nic_dev);
+	if (nic_dev->flags & HINIC_RSS_ENABLE) {
+		hinic_rss_deinit(nic_dev);
+		hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx);
+	}
 
 err_create_rxqs:
 	free_txqs(nic_dev);
@@ -496,6 +511,11 @@ static int hinic_close(struct net_device *netdev)
 		return err;
 	}
 
+	if (nic_dev->flags & HINIC_RSS_ENABLE) {
+		hinic_rss_deinit(nic_dev);
+		hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx);
+	}
+
 	free_rxqs(nic_dev);
 	free_txqs(nic_dev);
 
@@ -715,7 +735,6 @@ static void set_rx_mode(struct work_struct *work)
 {
 	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
 	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
-	struct netdev_hw_addr *ha;
 
 	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
 
@@ -723,9 +742,6 @@ static void set_rx_mode(struct work_struct *work)
 
 	__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
 	__dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
-
-	netdev_for_each_mc_addr(ha, nic_dev->netdev)
-		add_mac_addr(nic_dev->netdev, ha->addr);
 }
 
 static void hinic_set_rx_mode(struct net_device *netdev)
@@ -776,12 +792,36 @@ static void hinic_get_stats64(struct net_device *netdev,
 
 	stats->rx_bytes   = nic_rx_stats->bytes;
 	stats->rx_packets = nic_rx_stats->pkts;
+	stats->rx_errors  = nic_rx_stats->errors;
 
 	stats->tx_bytes   = nic_tx_stats->bytes;
 	stats->tx_packets = nic_tx_stats->pkts;
 	stats->tx_errors  = nic_tx_stats->tx_dropped;
 }
 
+static int hinic_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	return set_features(nic_dev, nic_dev->netdev->features,
+			    features, false);
+}
+
+static netdev_features_t hinic_fix_features(struct net_device *netdev,
+					    netdev_features_t features)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	/* If Rx checksum is disabled, then LRO should also be disabled */
+	if (!(features & NETIF_F_RXCSUM)) {
+		netif_info(nic_dev, drv, netdev, "disabling LRO as RXCSUM is off\n");
+		features &= ~NETIF_F_LRO;
+	}
+
+	return features;
+}
+
 static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_open = hinic_open,
 	.ndo_stop = hinic_close,
@@ -794,13 +834,16 @@ static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_start_xmit = hinic_xmit_frame,
 	.ndo_tx_timeout = hinic_tx_timeout,
 	.ndo_get_stats64 = hinic_get_stats64,
+	.ndo_fix_features = hinic_fix_features,
+	.ndo_set_features = hinic_set_features,
 };
 
 static void netdev_features_init(struct net_device *netdev)
 {
 	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
 			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
-			      NETIF_F_RXCSUM;
+			      NETIF_F_RXCSUM | NETIF_F_LRO |
+			      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -873,6 +916,18 @@ static int set_features(struct hinic_dev *nic_dev,
 	if (changed & NETIF_F_RXCSUM)
 		err = hinic_set_rx_csum_offload(nic_dev, csum_en);
 
+	if (changed & NETIF_F_LRO) {
+		err = hinic_set_rx_lro_state(nic_dev,
+					     !!(features & NETIF_F_LRO),
+					     HINIC_LRO_RX_TIMER_DEFAULT,
+					     HINIC_LRO_MAX_WQE_NUM_DEFAULT);
+	}
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		err = hinic_set_rx_vlan_offload(nic_dev,
+						!!(features &
+						   NETIF_F_HW_VLAN_CTAG_RX));
+
 	return err;
 }
 
@@ -912,8 +967,8 @@ static int nic_dev_init(struct pci_dev *pdev)
 		goto err_alloc_etherdev;
 	}
 
+	hinic_set_ethtool_ops(netdev);
 	netdev->netdev_ops = &hinic_netdev_ops;
-	netdev->ethtool_ops = &hinic_ethtool_ops;
 	netdev->max_mtu = ETH_MAX_MTU;
 
 	nic_dev = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 4b3b7d39e437..1e389a004e50 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -430,3 +430,641 @@ int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
 
 	return 0;
 }
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_vlan_cfg vlan_cfg;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+	vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	vlan_cfg.vlan_rx_offload = en;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD,
+				 &vlan_cfg, sizeof(vlan_cfg),
+				 &vlan_cfg, &out_size);
+	if (err || !out_size || vlan_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, vlan_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_rq_num rq_num = { 0 };
+	u16 out_size = sizeof(rq_num);
+	int err;
+
+	rq_num.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rq_num.num_rqs = num_rqs;
+	rq_num.rq_depth = ilog2(HINIC_SQ_DEPTH);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RQ_IQ_MAP,
+				 &rq_num, sizeof(rq_num),
+				 &rq_num, &out_size);
+	if (err || !out_size || rq_num.status) {
+		dev_err(&pdev->dev,
+			"Failed to rxq number, ret = %d\n",
+			rq_num.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rx_lro(struct hinic_dev *nic_dev, u8 ipv4_en, u8 ipv6_en,
+			    u8 max_wqe_num)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_lro_config lro_cfg = { 0 };
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(lro_cfg);
+	int err;
+
+	lro_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	lro_cfg.lro_ipv4_en = ipv4_en;
+	lro_cfg.lro_ipv6_en = ipv6_en;
+	lro_cfg.lro_max_wqe_num = max_wqe_num;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO,
+				 &lro_cfg, sizeof(lro_cfg),
+				 &lro_cfg, &out_size);
+	if (err || !out_size || lro_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set lro offload, ret = %d\n",
+			lro_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rx_lro_timer(struct hinic_dev *nic_dev, u32 timer_value)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_lro_timer lro_timer = { 0 };
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(lro_timer);
+	int err;
+
+	lro_timer.status = 0;
+	lro_timer.type = 0;
+	lro_timer.enable = 1;
+	lro_timer.timer = timer_value;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO_TIMER,
+				 &lro_timer, sizeof(lro_timer),
+				 &lro_timer, &out_size);
+	if (lro_timer.status == 0xFF) {
+		/* For this case, we think status (0xFF) is OK */
+		lro_timer.status = 0;
+		dev_dbg(&pdev->dev,
+			"Set lro timer not supported by the current FW version, it will be 1ms default\n");
+	}
+
+	if (err || !out_size || lro_timer.status) {
+		dev_err(&pdev->dev,
+			"Failed to set lro timer, ret = %d\n",
+			lro_timer.status);
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en,
+			   u32 lro_timer, u32 wqe_num)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u8 ipv4_en;
+	u8 ipv6_en;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	ipv4_en = lro_en ? 1 : 0;
+	ipv6_en = lro_en ? 1 : 0;
+
+	err = hinic_set_rx_lro(nic_dev, ipv4_en, ipv6_en, (u8)wqe_num);
+	if (err)
+		return err;
+
+	err = hinic_set_rx_lro_timer(nic_dev, lro_timer);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    const u32 *indir_table)
+{
+	struct hinic_rss_indirect_tbl *indir_tbl;
+	struct hinic_func_to_io *func_to_io;
+	struct hinic_cmdq_buf cmd_buf;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u32 indir_size;
+	u64 out_param;
+	int err, i;
+	u32 *temp;
+
+	hwdev = nic_dev->hwdev;
+	func_to_io = &hwdev->func_to_io;
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	cmd_buf.size = sizeof(*indir_tbl);
+
+	indir_tbl = cmd_buf.buf;
+	indir_tbl->group_index = cpu_to_be32(tmpl_idx);
+
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++) {
+		indir_tbl->entry[i] = indir_table[i];
+
+		if (0x3 == (i & 0x3)) {
+			temp = (u32 *)&indir_tbl->entry[i - 3];
+			*temp = cpu_to_be32(*temp);
+		}
+	}
+
+	/* cfg the rss indirect table by command queue */
+	indir_size = HINIC_RSS_INDIR_SIZE / 2;
+	indir_tbl->offset = 0;
+	indir_tbl->size = cpu_to_be32(indir_size);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+				     &cmd_buf, &out_param);
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss indir table\n");
+		err = -EFAULT;
+		goto free_buf;
+	}
+
+	indir_tbl->offset = cpu_to_be32(indir_size);
+	indir_tbl->size = cpu_to_be32(indir_size);
+	memcpy(&indir_tbl->entry[0], &indir_tbl->entry[indir_size], indir_size);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+				     &cmd_buf, &out_param);
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss indir table\n");
+		err = -EFAULT;
+	}
+
+free_buf:
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+
+	return err;
+}
+
+int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    u32 *indir_table)
+{
+	struct hinic_rss_indir_table rss_cfg = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(rss_cfg);
+	int err = 0, i;
+
+	rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_cfg.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev,
+				 HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL,
+				 &rss_cfg, sizeof(rss_cfg), &rss_cfg,
+				 &out_size);
+	if (err || !out_size || rss_cfg.status) {
+		dev_err(&pdev->dev, "Failed to get indir table, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	hinic_be32_to_cpu(rss_cfg.indir, HINIC_RSS_INDIR_SIZE);
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
+		indir_table[i] = rss_cfg.indir[i];
+
+	return 0;
+}
+
+int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type rss_type)
+{
+	struct hinic_rss_context_tbl *ctx_tbl;
+	struct hinic_func_to_io *func_to_io;
+	struct hinic_cmdq_buf cmd_buf;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u64 out_param;
+	u32 ctx = 0;
+	int err;
+
+	hwdev = nic_dev->hwdev;
+	func_to_io = &hwdev->func_to_io;
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmd buf\n");
+		return -ENOMEM;
+	}
+
+	ctx |=  HINIC_RSS_TYPE_SET(1, VALID) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv4, IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv6, IPV6) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv6_ext, IPV6_EXT) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv4, TCP_IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6, TCP_IPV6) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6_ext, TCP_IPV6_EXT) |
+		HINIC_RSS_TYPE_SET(rss_type.udp_ipv4, UDP_IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.udp_ipv6, UDP_IPV6);
+
+	cmd_buf.size = sizeof(struct hinic_rss_context_tbl);
+
+	ctx_tbl = (struct hinic_rss_context_tbl *)cmd_buf.buf;
+	ctx_tbl->group_index = cpu_to_be32(tmpl_idx);
+	ctx_tbl->offset = 0;
+	ctx_tbl->size = sizeof(u32);
+	ctx_tbl->size = cpu_to_be32(ctx_tbl->size);
+	ctx_tbl->rsvd = 0;
+	ctx_tbl->ctx = cpu_to_be32(ctx);
+
+	/* cfg the rss context table by command queue */
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE,
+				     &cmd_buf, &out_param);
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss context table, err: %d\n",
+			err);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type *rss_type)
+{
+	struct hinic_rss_context_table ctx_tbl = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(ctx_tbl);
+	int err;
+
+	if (!hwdev || !rss_type)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	ctx_tbl.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	ctx_tbl.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_CTX_TBL,
+				 &ctx_tbl, sizeof(ctx_tbl),
+				 &ctx_tbl, &out_size);
+	if (err || !out_size || ctx_tbl.status) {
+		dev_err(&pdev->dev, "Failed to get hash type, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, ctx_tbl.status, out_size);
+		return -EINVAL;
+	}
+
+	rss_type->ipv4          = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV4);
+	rss_type->ipv6          = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6);
+	rss_type->ipv6_ext      = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6_EXT);
+	rss_type->tcp_ipv4      = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV4);
+	rss_type->tcp_ipv6      = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV6);
+	rss_type->tcp_ipv6_ext  = HINIC_RSS_TYPE_GET(ctx_tbl.context,
+						     TCP_IPV6_EXT);
+	rss_type->udp_ipv4      = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV4);
+	rss_type->udp_ipv6      = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV6);
+
+	return 0;
+}
+
+int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id,
+			       const u8 *temp)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_rss_key rss_key = { 0 };
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_key.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_key.template_id = template_id;
+	memcpy(rss_key.key, temp, HINIC_RSS_KEY_SIZE);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL,
+				 &rss_key, sizeof(rss_key),
+				 &rss_key, &out_size);
+	if (err || !out_size || rss_key.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rss hash key, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_key.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			       u8 *temp)
+{
+	struct hinic_rss_template_key temp_key = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(temp_key);
+	int err;
+
+	if (!hwdev || !temp)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	temp_key.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	temp_key.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL,
+				 &temp_key, sizeof(temp_key),
+				 &temp_key, &out_size);
+	if (err || !out_size || temp_key.status) {
+		dev_err(&pdev->dev, "Failed to set hash key, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, temp_key.status, out_size);
+		return -EINVAL;
+	}
+
+	memcpy(temp, temp_key.key, HINIC_RSS_KEY_SIZE);
+
+	return 0;
+}
+
+int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id,
+			      u8 type)
+{
+	struct hinic_rss_engine_type rss_engine = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_engine.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_engine.hash_engine = type;
+	rss_engine.template_id = template_id;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_HASH_ENGINE,
+				 &rss_engine, sizeof(rss_engine),
+				 &rss_engine, &out_size);
+	if (err || !out_size || rss_engine.status) {
+		dev_err(&pdev->dev,
+			"Failed to set hash engine, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_engine.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx, u8 *type)
+{
+	struct hinic_rss_engine_type hash_type = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(hash_type);
+	int err;
+
+	if (!hwdev || !type)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	hash_type.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	hash_type.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_HASH_ENGINE,
+				 &hash_type, sizeof(hash_type),
+				 &hash_type, &out_size);
+	if (err || !out_size || hash_type.status) {
+		dev_err(&pdev->dev, "Failed to get hash engine, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, hash_type.status, out_size);
+		return -EINVAL;
+	}
+
+	*type = hash_type.hash_engine;
+	return 0;
+}
+
+int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_rss_config rss_cfg = { 0 };
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_cfg.rss_en = rss_en;
+	rss_cfg.template_id = template_id;
+	rss_cfg.rq_priority_number = 0;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_CFG,
+				 &rss_cfg, sizeof(rss_cfg),
+				 &rss_cfg, &out_size);
+	if (err || !out_size || rss_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rss cfg, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx)
+{
+	struct hinic_rss_template_mgmt template_mgmt = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	template_mgmt.cmd = NIC_RSS_CMD_TEMP_ALLOC;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR,
+				 &template_mgmt, sizeof(template_mgmt),
+				 &template_mgmt, &out_size);
+	if (err || !out_size || template_mgmt.status) {
+		dev_err(&pdev->dev, "Failed to alloc rss template, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, template_mgmt.status, out_size);
+		return -EINVAL;
+	}
+
+	*tmpl_idx = template_mgmt.template_id;
+
+	return 0;
+}
+
+int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx)
+{
+	struct hinic_rss_template_mgmt template_mgmt = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	template_mgmt.template_id = tmpl_idx;
+	template_mgmt.cmd = NIC_RSS_CMD_TEMP_FREE;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR,
+				 &template_mgmt, sizeof(template_mgmt),
+				 &template_mgmt, &out_size);
+	if (err || !out_size || template_mgmt.status) {
+		dev_err(&pdev->dev, "Failed to free rss template, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, template_mgmt.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_get_vport_stats(struct hinic_dev *nic_dev,
+			  struct hinic_vport_stats *stats)
+{
+	struct hinic_cmd_vport_stats vport_stats = { 0 };
+	struct hinic_port_stats_info stats_info = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(vport_stats);
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	stats_info.stats_version = HINIC_PORT_STATS_VERSION;
+	stats_info.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	stats_info.stats_size = sizeof(vport_stats);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_VPORT_STAT,
+				 &stats_info, sizeof(stats_info),
+				 &vport_stats, &out_size);
+	if (err || !out_size || vport_stats.status) {
+		dev_err(&pdev->dev,
+			"Failed to get function statistics, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, vport_stats.status, out_size);
+		return -EFAULT;
+	}
+
+	memcpy(stats, &vport_stats.stats, sizeof(*stats));
+	return 0;
+}
+
+int hinic_get_phy_port_stats(struct hinic_dev *nic_dev,
+			     struct hinic_phy_port_stats *stats)
+{
+	struct hinic_port_stats_info stats_info = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_port_stats *port_stats;
+	u16 out_size = sizeof(*port_stats);
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL);
+	if (!port_stats)
+		return -ENOMEM;
+
+	stats_info.stats_version = HINIC_PORT_STATS_VERSION;
+	stats_info.stats_size = sizeof(*port_stats);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_PORT_STATISTICS,
+				 &stats_info, sizeof(stats_info),
+				 port_stats, &out_size);
+	if (err || !out_size || port_stats->status) {
+		dev_err(&pdev->dev,
+			"Failed to get port statistics, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, port_stats->status, out_size);
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(stats, &port_stats->stats, sizeof(*stats));
+
+out:
+	kfree(port_stats);
+
+	return err;
+}
+
+int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_version_info up_ver = {0};
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_MGMT_VERSION,
+				 &up_ver, sizeof(up_ver), &up_ver,
+				 &out_size);
+	if (err || !out_size || up_ver.status) {
+		dev_err(&pdev->dev,
+			"Failed to get mgmt version, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, up_ver.status, out_size);
+		return -EINVAL;
+	}
+
+	snprintf(mgmt_ver, HINIC_MGMT_VERSION_MAX_LEN, "%s", up_ver.ver);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index c562afd206be..44772fd47fc1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -13,6 +13,22 @@
 
 #include "hinic_dev.h"
 
+#define HINIC_RSS_KEY_SIZE	40
+#define HINIC_RSS_INDIR_SIZE	256
+#define HINIC_PORT_STATS_VERSION	0
+#define HINIC_FW_VERSION_NAME	16
+#define HINIC_COMPILE_TIME_LEN	20
+#define HINIC_MGMT_VERSION_MAX_LEN	32
+
+struct hinic_version_info {
+	u8 status;
+	u8 version;
+	u8 rsvd[6];
+
+	u8 ver[HINIC_FW_VERSION_NAME];
+	u8 time[HINIC_COMPILE_TIME_LEN];
+};
+
 enum hinic_rx_mode {
 	HINIC_RX_MODE_UC        = BIT(0),
 	HINIC_RX_MODE_MC        = BIT(1),
@@ -183,6 +199,313 @@ struct hinic_checksum_offload {
 	u16	rsvd1;
 	u32	rx_csum_offload;
 };
+
+struct hinic_rq_num {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1[33];
+	u32	num_rqs;
+	u32	rq_depth;
+};
+
+struct hinic_lro_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u8	lro_ipv4_en;
+	u8	lro_ipv6_en;
+	u8	lro_max_wqe_num;
+	u8	resv2[13];
+};
+
+struct hinic_lro_timer {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u8	type;   /* 0: set timer value, 1: get timer value */
+	u8	enable; /* when set lro time, enable should be 1 */
+	u16	rsvd1;
+	u32	timer;
+};
+
+struct hinic_vlan_cfg {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      vlan_rx_offload;
+	u8      rsvd1[5];
+};
+
+struct hinic_rss_template_mgmt {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	cmd;
+	u8	template_id;
+	u8	rsvd1[4];
+};
+
+struct hinic_rss_template_key {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	rsvd1;
+	u8	key[HINIC_RSS_KEY_SIZE];
+};
+
+struct hinic_rss_context_tbl {
+	u32 group_index;
+	u32 offset;
+	u32 size;
+	u32 rsvd;
+	u32 ctx;
+};
+
+struct hinic_rss_context_table {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      template_id;
+	u8      rsvd1;
+	u32     context;
+};
+
+struct hinic_rss_indirect_tbl {
+	u32 group_index;
+	u32 offset;
+	u32 size;
+	u32 rsvd;
+	u8 entry[HINIC_RSS_INDIR_SIZE];
+};
+
+struct hinic_rss_indir_table {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      template_id;
+	u8      rsvd1;
+	u8      indir[HINIC_RSS_INDIR_SIZE];
+};
+
+struct hinic_rss_key {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	rsvd1;
+	u8	key[HINIC_RSS_KEY_SIZE];
+};
+
+struct hinic_rss_engine_type {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	hash_engine;
+	u8	rsvd1[4];
+};
+
+struct hinic_rss_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	rss_en;
+	u8	template_id;
+	u8	rq_priority_number;
+	u8	rsvd1[11];
+};
+
+struct hinic_stats {
+	char name[ETH_GSTRING_LEN];
+	u32 size;
+	int offset;
+};
+
+struct hinic_vport_stats {
+	u64 tx_unicast_pkts_vport;
+	u64 tx_unicast_bytes_vport;
+	u64 tx_multicast_pkts_vport;
+	u64 tx_multicast_bytes_vport;
+	u64 tx_broadcast_pkts_vport;
+	u64 tx_broadcast_bytes_vport;
+
+	u64 rx_unicast_pkts_vport;
+	u64 rx_unicast_bytes_vport;
+	u64 rx_multicast_pkts_vport;
+	u64 rx_multicast_bytes_vport;
+	u64 rx_broadcast_pkts_vport;
+	u64 rx_broadcast_bytes_vport;
+
+	u64 tx_discard_vport;
+	u64 rx_discard_vport;
+	u64 tx_err_vport;
+	u64 rx_err_vport;
+};
+
+struct hinic_phy_port_stats {
+	u64 mac_rx_total_pkt_num;
+	u64 mac_rx_total_oct_num;
+	u64 mac_rx_bad_pkt_num;
+	u64 mac_rx_bad_oct_num;
+	u64 mac_rx_good_pkt_num;
+	u64 mac_rx_good_oct_num;
+	u64 mac_rx_uni_pkt_num;
+	u64 mac_rx_multi_pkt_num;
+	u64 mac_rx_broad_pkt_num;
+
+	u64 mac_tx_total_pkt_num;
+	u64 mac_tx_total_oct_num;
+	u64 mac_tx_bad_pkt_num;
+	u64 mac_tx_bad_oct_num;
+	u64 mac_tx_good_pkt_num;
+	u64 mac_tx_good_oct_num;
+	u64 mac_tx_uni_pkt_num;
+	u64 mac_tx_multi_pkt_num;
+	u64 mac_tx_broad_pkt_num;
+
+	u64 mac_rx_fragment_pkt_num;
+	u64 mac_rx_undersize_pkt_num;
+	u64 mac_rx_undermin_pkt_num;
+	u64 mac_rx_64_oct_pkt_num;
+	u64 mac_rx_65_127_oct_pkt_num;
+	u64 mac_rx_128_255_oct_pkt_num;
+	u64 mac_rx_256_511_oct_pkt_num;
+	u64 mac_rx_512_1023_oct_pkt_num;
+	u64 mac_rx_1024_1518_oct_pkt_num;
+	u64 mac_rx_1519_2047_oct_pkt_num;
+	u64 mac_rx_2048_4095_oct_pkt_num;
+	u64 mac_rx_4096_8191_oct_pkt_num;
+	u64 mac_rx_8192_9216_oct_pkt_num;
+	u64 mac_rx_9217_12287_oct_pkt_num;
+	u64 mac_rx_12288_16383_oct_pkt_num;
+	u64 mac_rx_1519_max_bad_pkt_num;
+	u64 mac_rx_1519_max_good_pkt_num;
+	u64 mac_rx_oversize_pkt_num;
+	u64 mac_rx_jabber_pkt_num;
+
+	u64 mac_rx_pause_num;
+	u64 mac_rx_pfc_pkt_num;
+	u64 mac_rx_pfc_pri0_pkt_num;
+	u64 mac_rx_pfc_pri1_pkt_num;
+	u64 mac_rx_pfc_pri2_pkt_num;
+	u64 mac_rx_pfc_pri3_pkt_num;
+	u64 mac_rx_pfc_pri4_pkt_num;
+	u64 mac_rx_pfc_pri5_pkt_num;
+	u64 mac_rx_pfc_pri6_pkt_num;
+	u64 mac_rx_pfc_pri7_pkt_num;
+	u64 mac_rx_control_pkt_num;
+	u64 mac_rx_y1731_pkt_num;
+	u64 mac_rx_sym_err_pkt_num;
+	u64 mac_rx_fcs_err_pkt_num;
+	u64 mac_rx_send_app_good_pkt_num;
+	u64 mac_rx_send_app_bad_pkt_num;
+
+	u64 mac_tx_fragment_pkt_num;
+	u64 mac_tx_undersize_pkt_num;
+	u64 mac_tx_undermin_pkt_num;
+	u64 mac_tx_64_oct_pkt_num;
+	u64 mac_tx_65_127_oct_pkt_num;
+	u64 mac_tx_128_255_oct_pkt_num;
+	u64 mac_tx_256_511_oct_pkt_num;
+	u64 mac_tx_512_1023_oct_pkt_num;
+	u64 mac_tx_1024_1518_oct_pkt_num;
+	u64 mac_tx_1519_2047_oct_pkt_num;
+	u64 mac_tx_2048_4095_oct_pkt_num;
+	u64 mac_tx_4096_8191_oct_pkt_num;
+	u64 mac_tx_8192_9216_oct_pkt_num;
+	u64 mac_tx_9217_12287_oct_pkt_num;
+	u64 mac_tx_12288_16383_oct_pkt_num;
+	u64 mac_tx_1519_max_bad_pkt_num;
+	u64 mac_tx_1519_max_good_pkt_num;
+	u64 mac_tx_oversize_pkt_num;
+	u64 mac_tx_jabber_pkt_num;
+
+	u64 mac_tx_pause_num;
+	u64 mac_tx_pfc_pkt_num;
+	u64 mac_tx_pfc_pri0_pkt_num;
+	u64 mac_tx_pfc_pri1_pkt_num;
+	u64 mac_tx_pfc_pri2_pkt_num;
+	u64 mac_tx_pfc_pri3_pkt_num;
+	u64 mac_tx_pfc_pri4_pkt_num;
+	u64 mac_tx_pfc_pri5_pkt_num;
+	u64 mac_tx_pfc_pri6_pkt_num;
+	u64 mac_tx_pfc_pri7_pkt_num;
+	u64 mac_tx_control_pkt_num;
+	u64 mac_tx_y1731_pkt_num;
+	u64 mac_tx_1588_pkt_num;
+	u64 mac_tx_err_all_pkt_num;
+	u64 mac_tx_from_app_good_pkt_num;
+	u64 mac_tx_from_app_bad_pkt_num;
+
+	u64 mac_rx_higig2_ext_pkt_num;
+	u64 mac_rx_higig2_message_pkt_num;
+	u64 mac_rx_higig2_error_pkt_num;
+	u64 mac_rx_higig2_cpu_ctrl_pkt_num;
+	u64 mac_rx_higig2_unicast_pkt_num;
+	u64 mac_rx_higig2_broadcast_pkt_num;
+	u64 mac_rx_higig2_l2_multicast_pkt_num;
+	u64 mac_rx_higig2_l3_multicast_pkt_num;
+
+	u64 mac_tx_higig2_message_pkt_num;
+	u64 mac_tx_higig2_ext_pkt_num;
+	u64 mac_tx_higig2_cpu_ctrl_pkt_num;
+	u64 mac_tx_higig2_unicast_pkt_num;
+	u64 mac_tx_higig2_broadcast_pkt_num;
+	u64 mac_tx_higig2_l2_multicast_pkt_num;
+	u64 mac_tx_higig2_l3_multicast_pkt_num;
+};
+
+struct hinic_port_stats_info {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	stats_version;
+	u32	stats_size;
+};
+
+struct hinic_port_stats {
+	u8 status;
+	u8 version;
+	u8 rsvd[6];
+
+	struct hinic_phy_port_stats stats;
+};
+
+struct hinic_cmd_vport_stats {
+	u8 status;
+	u8 version;
+	u8 rsvd0[6];
+
+	struct hinic_vport_stats stats;
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -211,7 +534,55 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
 int hinic_port_get_cap(struct hinic_dev *nic_dev,
 		       struct hinic_port_cap *port_cap);
 
+int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs);
+
 int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
 
 int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en);
+
+int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en,
+			   u32 lro_timer, u32 wqe_num);
+
+int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type rss_type);
+
+int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    const u32 *indir_table);
+
+int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id,
+			       const u8 *temp);
+
+int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id,
+			      u8 type);
+
+int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id);
+
+int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx);
+
+int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx);
+
+void hinic_set_ethtool_ops(struct net_device *netdev);
+
+int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type *rss_type);
+
+int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    u32 *indir_table);
+
+int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			       u8 *temp);
+
+int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx,
+			      u8 *type);
+
+int hinic_get_phy_port_stats(struct hinic_dev *nic_dev,
+			     struct hinic_phy_port_stats *stats);
+
+int hinic_get_vport_stats(struct hinic_dev *nic_dev,
+			  struct hinic_vport_stats *stats);
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en);
+
+int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver);
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 0850ea83d6c1..56ea6d692f1c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
 #include <linux/cpumask.h>
+#include <linux/if_vlan.h>
 #include <asm/barrier.h>
 
 #include "hinic_common.h"
@@ -36,6 +37,15 @@
 #define RX_IRQ_NO_RESEND_TIMER          0
 #define HINIC_RX_BUFFER_WRITE           16
 
+#define HINIC_RX_IPV6_PKT		7
+#define LRO_PKT_HDR_LEN_IPV4		66
+#define LRO_PKT_HDR_LEN_IPV6		86
+#define LRO_REPLENISH_THLD		256
+
+#define LRO_PKT_HDR_LEN(cqe)		\
+	(HINIC_GET_RX_PKT_TYPE(be32_to_cpu((cqe)->offload_type)) == \
+	 HINIC_RX_IPV6_PKT ? LRO_PKT_HDR_LEN_IPV6 : LRO_PKT_HDR_LEN_IPV4)
+
 /**
  * hinic_rxq_clean_stats - Clean the statistics of specific queue
  * @rxq: Logical Rx Queue
@@ -47,6 +57,9 @@ void hinic_rxq_clean_stats(struct hinic_rxq *rxq)
 	u64_stats_update_begin(&rxq_stats->syncp);
 	rxq_stats->pkts  = 0;
 	rxq_stats->bytes = 0;
+	rxq_stats->errors = 0;
+	rxq_stats->csum_errors = 0;
+	rxq_stats->other_errors = 0;
 	u64_stats_update_end(&rxq_stats->syncp);
 }
 
@@ -65,6 +78,10 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
 		start = u64_stats_fetch_begin(&rxq_stats->syncp);
 		stats->pkts = rxq_stats->pkts;
 		stats->bytes = rxq_stats->bytes;
+		stats->errors = rxq_stats->csum_errors +
+				rxq_stats->other_errors;
+		stats->csum_errors = rxq_stats->csum_errors;
+		stats->other_errors = rxq_stats->other_errors;
 	} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
 	u64_stats_update_end(&stats->syncp);
 }
@@ -81,27 +98,25 @@ static void rxq_stats_init(struct hinic_rxq *rxq)
 	hinic_rxq_clean_stats(rxq);
 }
 
-static void rx_csum(struct hinic_rxq *rxq, u16 cons_idx,
+static void rx_csum(struct hinic_rxq *rxq, u32 status,
 		    struct sk_buff *skb)
 {
 	struct net_device *netdev = rxq->netdev;
-	struct hinic_rq_cqe *cqe;
-	struct hinic_rq *rq;
 	u32 csum_err;
-	u32 status;
 
-	rq = rxq->rq;
-	cqe = rq->cqe[cons_idx];
-	status = be32_to_cpu(cqe->status);
 	csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR);
 
 	if (!(netdev->features & NETIF_F_RXCSUM))
 		return;
 
-	if (!csum_err)
+	if (!csum_err) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	else
+	} else {
+		if (!(csum_err & (HINIC_RX_CSUM_HW_CHECK_NONE |
+			HINIC_RX_CSUM_IPSU_OTHER_ERR)))
+			rxq->rxq_stats.csum_errors++;
 		skb->ip_summed = CHECKSUM_NONE;
+	}
 }
 /**
  * rx_alloc_skb - allocate skb and map it to dma address
@@ -311,13 +326,21 @@ static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb,
 static int rxq_recv(struct hinic_rxq *rxq, int budget)
 {
 	struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
+	struct net_device *netdev = rxq->netdev;
 	u64 pkt_len = 0, rx_bytes = 0;
+	struct hinic_rq *rq = rxq->rq;
 	struct hinic_rq_wqe *rq_wqe;
 	unsigned int free_wqebbs;
+	struct hinic_rq_cqe *cqe;
 	int num_wqes, pkts = 0;
 	struct hinic_sge sge;
+	unsigned int status;
 	struct sk_buff *skb;
-	u16 ci;
+	u32 offload_type;
+	u16 ci, num_lro;
+	u16 num_wqe = 0;
+	u32 vlan_len;
+	u16 vid;
 
 	while (pkts < budget) {
 		num_wqes = 0;
@@ -327,11 +350,13 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 		if (!rq_wqe)
 			break;
 
+		cqe = rq->cqe[ci];
+		status =  be32_to_cpu(cqe->status);
 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
 
 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
 
-		rx_csum(rxq, ci, skb);
+		rx_csum(rxq, status, skb);
 
 		prefetch(skb->data);
 
@@ -345,9 +370,17 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 						     HINIC_RX_BUF_SZ, ci);
 		}
 
-		hinic_rq_put_wqe(rxq->rq, ci,
+		hinic_rq_put_wqe(rq, ci,
 				 (num_wqes + 1) * HINIC_RQ_WQE_SIZE);
 
+		offload_type = be32_to_cpu(cqe->offload_type);
+		vlan_len = be32_to_cpu(cqe->len);
+		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) {
+			vid = HINIC_GET_RX_VLAN_TAG(vlan_len);
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+		}
+
 		skb_record_rx_queue(skb, qp->q_id);
 		skb->protocol = eth_type_trans(skb, rxq->netdev);
 
@@ -355,6 +388,21 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 
 		pkts++;
 		rx_bytes += pkt_len;
+
+		num_lro = HINIC_GET_RX_NUM_LRO(status);
+		if (num_lro) {
+			rx_bytes += ((num_lro - 1) *
+				     LRO_PKT_HDR_LEN(cqe));
+
+			num_wqe +=
+			(u16)(pkt_len >> rxq->rx_buff_shift) +
+			((pkt_len & (rxq->buf_len - 1)) ? 1 : 0);
+		}
+
+		cqe->status = 0;
+
+		if (num_wqe >= LRO_REPLENISH_THLD)
+			break;
 	}
 
 	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
@@ -469,20 +517,20 @@ int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
 		   struct net_device *netdev)
 {
 	struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq);
-	int err, pkts, irqname_len;
+	int err, pkts;
 
 	rxq->netdev = netdev;
 	rxq->rq = rq;
+	rxq->buf_len = HINIC_RX_BUF_SZ;
+	rxq->rx_buff_shift = ilog2(HINIC_RX_BUF_SZ);
 
 	rxq_stats_init(rxq);
 
-	irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1;
-	rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+	rxq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL,
+				       "hinic_rxq%d", qp->q_id);
 	if (!rxq->irq_name)
 		return -ENOMEM;
 
-	sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id);
-
 	pkts = rx_alloc_pkts(rxq);
 	if (!pkts) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
index bc797498a87f..507dcbae9085 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
@@ -21,7 +21,10 @@
 struct hinic_rxq_stats {
 	u64                     pkts;
 	u64                     bytes;
-
+	u64			errors;
+	u64			csum_errors;
+	u64			other_errors;
+	u64			alloc_skb_err;
 	struct u64_stats_sync   syncp;
 };
 
@@ -32,6 +35,8 @@ struct hinic_rxq {
 	struct hinic_rxq_stats  rxq_stats;
 
 	char                    *irq_name;
+	u16			buf_len;
+	u32			rx_buff_shift;
 
 	struct napi_struct      napi;
 };
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index b9fd8d720349..9c78251f9c39 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -83,6 +83,7 @@ void hinic_txq_clean_stats(struct hinic_txq *txq)
 	txq_stats->tx_busy = 0;
 	txq_stats->tx_wake = 0;
 	txq_stats->tx_dropped = 0;
+	txq_stats->big_frags_pkts = 0;
 	u64_stats_update_end(&txq_stats->syncp);
 }
 
@@ -104,6 +105,7 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
 		stats->tx_busy = txq_stats->tx_busy;
 		stats->tx_wake = txq_stats->tx_wake;
 		stats->tx_dropped = txq_stats->tx_dropped;
+		stats->big_frags_pkts = txq_stats->big_frags_pkts;
 	} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
 	u64_stats_update_end(&stats->syncp);
 }
@@ -405,10 +407,20 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
 	return 1;
 }
 
+static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info,
+			 u16 vlan_tag, u16 vlan_pri)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
+				HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI);
+}
+
 static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
 			    u32 *queue_info)
 {
 	enum hinic_offload_type offload = 0;
+	u16 vlan_tag;
 	int enabled;
 
 	enabled = offload_tso(task, queue_info, skb);
@@ -422,6 +434,13 @@ static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
 		return -EPROTONOSUPPORT;
 	}
 
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		vlan_tag = skb_vlan_tag_get(skb);
+		offload_vlan(task, queue_info, vlan_tag,
+			     vlan_tag >> VLAN_PRIO_SHIFT);
+		offload |= TX_OFFLOAD_VLAN;
+	}
+
 	if (offload)
 		hinic_task_set_l2hdr(task, skb_network_offset(skb));
 
@@ -464,6 +483,12 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	nr_sges = skb_shinfo(skb)->nr_frags + 1;
+	if (nr_sges > 17) {
+		u64_stats_update_begin(&txq->txq_stats.syncp);
+		txq->txq_stats.big_frags_pkts++;
+		u64_stats_update_end(&txq->txq_stats.syncp);
+	}
+
 	if (nr_sges > txq->max_sges) {
 		netdev_err(netdev, "Too many Tx sges\n");
 		goto skb_error;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.h b/drivers/net/ethernet/huawei/hinic/hinic_tx.h
index ca5f537fc383..f158b7db7fb8 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.h
@@ -21,6 +21,7 @@ struct hinic_txq_stats {
 	u64     tx_busy;
 	u64     tx_wake;
 	u64     tx_dropped;
+	u64	big_frags_pkts;
 
 	struct u64_stats_sync   syncp;
 };
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 551de8c2fef2..f703fa58458e 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3019,7 +3019,7 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
 	 * applicable for weak-ordered memory model archs,
 	 * such as IA-64).
 	 */
-	wmb();
+	dma_wmb();
 
 	tx_ring->next_to_use = i;
 }
@@ -4540,7 +4540,7 @@ e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
 		 * applicable for weak-ordered memory model archs,
 		 * such as IA-64).
 		 */
-		wmb();
+		dma_wmb();
 		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
 	}
 }
@@ -4655,7 +4655,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
 		 * applicable for weak-ordered memory model archs,
 		 * such as IA-64).
 		 */
-		wmb();
+		dma_wmb();
 		writel(i, hw->hw_addr + rx_ring->rdt);
 	}
 }
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index f86d55657959..4b103cca8a39 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -680,7 +680,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
 	ew32(TCTL, E1000_TCTL_PSP);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	ctrl = er32(CTRL);
 
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index b9309302c29e..2c1bab377b2a 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -959,7 +959,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
 	ew32(TCTL, tctl);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Must acquire the MDIO ownership before MAC reset.
 	 * Ownership defaults to firmware after a reset.
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index fd550dee4982..63c3c79380a1 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -222,6 +222,9 @@
 #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
 #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
 
+/* PCIm function state */
+#define E1000_STATUS_PCIM_STATE	0x40000000
+
 #define HALF_DUPLEX 1
 #define FULL_DUPLEX 2
 
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index be13227f1697..34cd67951aec 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -186,12 +186,13 @@ struct e1000_phy_regs {
 
 /* board specific private data structure */
 struct e1000_adapter {
-	struct timer_list watchdog_timer;
 	struct timer_list phy_info_timer;
 	struct timer_list blink_timer;
 
 	struct work_struct reset_task;
-	struct work_struct watchdog_task;
+	struct delayed_work watchdog_task;
+
+	struct workqueue_struct *e1000_workqueue;
 
 	const struct e1000_info *ei;
 
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 02ebf208f48b..08342698386d 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1014,7 +1014,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
 	/* Disable all the interrupts */
 	ew32(IMC, 0xFFFFFFFF);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Test each interrupt */
 	for (i = 0; i < 10; i++) {
@@ -1046,7 +1046,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
 			ew32(IMC, mask);
 			ew32(ICS, mask);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr & mask) {
 				*data = 3;
@@ -1064,7 +1064,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
 		ew32(IMS, mask);
 		ew32(ICS, mask);
 		e1e_flush();
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 
 		if (!(adapter->test_icr & mask)) {
 			*data = 4;
@@ -1082,7 +1082,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
 			ew32(IMC, ~mask & 0x00007FFF);
 			ew32(ICS, ~mask & 0x00007FFF);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr) {
 				*data = 5;
@@ -1094,7 +1094,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
 	/* Disable all the interrupts */
 	ew32(IMC, 0xFFFFFFFF);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Unhook test interrupt handler */
 	free_irq(irq, netdev);
@@ -1470,7 +1470,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
 	 */
 	ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	return 0;
 }
@@ -1584,7 +1584,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
 		    hw->phy.media_type == e1000_media_type_internal_serdes) {
 			ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 			break;
 		}
 		/* Fall Through */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index cdae0efde8e6..395b05701480 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -271,7 +271,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
 		u16 count = 20;
 
 		do {
-			usleep_range(5000, 10000);
+			usleep_range(5000, 6000);
 		} while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
 
 		msleep(30);
@@ -405,7 +405,7 @@ out:
 	/* Ungate automatic PHY configuration on non-managed 82579 */
 	if ((hw->mac.type == e1000_pch2lan) &&
 	    !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		e1000_gate_hw_phy_config_ich8lan(hw, false);
 	}
 
@@ -531,7 +531,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
 	phy->id = 0;
 	while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
 	       (i++ < 100)) {
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 		ret_val = e1000e_get_phy_id(hw);
 		if (ret_val)
 			return ret_val;
@@ -1244,7 +1244,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
 				goto out;
 			}
 
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 		}
 		e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
 
@@ -1999,7 +1999,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
 
 	while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
 	       (i++ < 30))
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	return blocked ? E1000_BLK_PHY_RESET : 0;
 }
 
@@ -2818,7 +2818,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
 		return 0;
 
 	/* Allow time for h/w to get to quiescent state after reset */
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Perform any necessary post-reset workarounds */
 	switch (hw->mac.type) {
@@ -2854,7 +2854,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
 	if (hw->mac.type == e1000_pch2lan) {
 		/* Ungate automatic PHY configuration on non-managed 82579 */
 		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 			e1000_gate_hw_phy_config_ich8lan(hw, false);
 		}
 
@@ -3875,7 +3875,7 @@ release:
 	 */
 	if (!ret_val) {
 		nvm->ops.reload(hw);
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	}
 
 out:
@@ -4026,7 +4026,7 @@ release:
 	 */
 	if (!ret_val) {
 		nvm->ops.reload(hw);
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	}
 
 out:
@@ -4650,7 +4650,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
 	ew32(TCTL, E1000_TCTL_PSP);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Workaround for ICH8 bit corruption issue in FIFO memory */
 	if (hw->mac.type == e1000_ich8lan) {
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 4abd55d646c5..e531976f8a67 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -797,7 +797,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
 	 * milliseconds even if the other end is doing it in SW).
 	 */
 	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		status = er32(STATUS);
 		if (status & E1000_STATUS_LU)
 			break;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0e09bede42a2..e4baa13b3cda 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1780,7 +1780,8 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
 		}
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			queue_delayed_work(adapter->e1000_workqueue,
+					   &adapter->watchdog_task, 1);
 	}
 
 	/* Reset on uncorrectable ECC error */
@@ -1860,7 +1861,8 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
 		}
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			queue_delayed_work(adapter->e1000_workqueue,
+					   &adapter->watchdog_task, 1);
 	}
 
 	/* Reset on uncorrectable ECC error */
@@ -1905,7 +1907,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			queue_delayed_work(adapter->e1000_workqueue,
+					   &adapter->watchdog_task, 1);
 	}
 
 	if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -3208,7 +3211,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
 		ew32(RCTL, rctl & ~E1000_RCTL_EN);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	if (adapter->flags2 & FLAG2_DMA_BURST) {
 		/* set the writeback threshold (only takes effect if the RDTR
@@ -4046,12 +4049,12 @@ void e1000e_reset(struct e1000_adapter *adapter)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
-		fc->refresh_time = 0x0400;
+		fc->refresh_time = 0xFFFF;
+		fc->pause_time = 0xFFFF;
 
 		if (adapter->netdev->mtu <= ETH_DATA_LEN) {
 			fc->high_water = 0x05C20;
 			fc->low_water = 0x05048;
-			fc->pause_time = 0x0650;
 			break;
 		}
 
@@ -4208,7 +4211,7 @@ void e1000e_up(struct e1000_adapter *adapter)
 		e1000_configure_msix(adapter);
 	e1000_irq_enable(adapter);
 
-	netif_start_queue(adapter->netdev);
+	/* Tx queue started by watchdog timer when link is up */
 
 	e1000e_trigger_lsc(adapter);
 }
@@ -4272,13 +4275,12 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset)
 
 	/* flush both disables and wait for them to finish */
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	e1000_irq_disable(adapter);
 
 	napi_synchronize(&adapter->napi);
 
-	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
 	spin_lock(&adapter->stats64_lock);
@@ -4310,7 +4312,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
 {
 	might_sleep();
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 	e1000e_down(adapter, true);
 	e1000e_up(adapter);
 	clear_bit(__E1000_RESETTING, &adapter->state);
@@ -4606,6 +4608,7 @@ int e1000e_open(struct net_device *netdev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
 
 	/* allocate transmit descriptors */
 	err = e1000e_setup_tx_resources(adapter->tx_ring);
@@ -4666,7 +4669,6 @@ int e1000e_open(struct net_device *netdev)
 	e1000_irq_enable(adapter);
 
 	adapter->tx_hang_recheck = false;
-	netif_start_queue(netdev);
 
 	hw->mac.get_link_status = true;
 	pm_runtime_put(&pdev->dev);
@@ -4707,7 +4709,7 @@ int e1000e_close(struct net_device *netdev)
 	int count = E1000_CHECK_RESET_COUNT;
 
 	while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 
 	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -5150,31 +5152,18 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
 	}
 }
 
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
-	struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
-	/* Do the rest outside of interrupt context */
-	schedule_work(&adapter->watchdog_task);
-
-	/* TODO: make this use queue_delayed_work() */
-}
-
 static void e1000_watchdog_task(struct work_struct *work)
 {
 	struct e1000_adapter *adapter = container_of(work,
 						     struct e1000_adapter,
-						     watchdog_task);
+						     watchdog_task.work);
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_mac_info *mac = &adapter->hw.mac;
 	struct e1000_phy_info *phy = &adapter->hw.phy;
 	struct e1000_ring *tx_ring = adapter->tx_ring;
+	u32 dmoff_exit_timeout = 100, tries = 0;
 	struct e1000_hw *hw = &adapter->hw;
-	u32 link, tctl;
+	u32 link, tctl, pcim_state;
 
 	if (test_bit(__E1000_DOWN, &adapter->state))
 		return;
@@ -5199,6 +5188,21 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* Cancel scheduled suspend requests. */
 			pm_runtime_resume(netdev->dev.parent);
 
+			/* Checking if MAC is in DMoff state*/
+			pcim_state = er32(STATUS);
+			while (pcim_state & E1000_STATUS_PCIM_STATE) {
+				if (tries++ == dmoff_exit_timeout) {
+					e_dbg("Error in exiting dmoff\n");
+					break;
+				}
+				usleep_range(10000, 20000);
+				pcim_state = er32(STATUS);
+
+				/* Checking if MAC exited DMoff state */
+				if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+					e1000_phy_hw_reset(&adapter->hw);
+			}
+
 			/* update snapshot of PHY registers on LSC */
 			e1000_phy_read_status(adapter);
 			mac->ops.get_link_up_info(&adapter->hw,
@@ -5288,6 +5292,7 @@ static void e1000_watchdog_task(struct work_struct *work)
 			if (phy->ops.cfg_on_link_up)
 				phy->ops.cfg_on_link_up(hw);
 
+			netif_wake_queue(netdev);
 			netif_carrier_on(netdev);
 
 			if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5301,6 +5306,7 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* Link status message must follow this format */
 			pr_info("%s NIC Link is Down\n", adapter->netdev->name);
 			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
 			if (!test_bit(__E1000_DOWN, &adapter->state))
 				mod_timer(&adapter->phy_info_timer,
 					  round_jiffies(jiffies + 2 * HZ));
@@ -5308,13 +5314,8 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* 8000ES2LAN requires a Rx packet buffer work-around
 			 * on link down event; reset the controller to flush
 			 * the Rx packet buffer.
-			 *
-			 * If the link is lost the controller stops DMA, but
-			 * if there is queued Tx work it cannot be done.  So
-			 * reset the controller to flush the Tx packet buffers.
 			 */
-			if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
-			    e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
+			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
 				adapter->flags |= FLAG_RESTART_NOW;
 			else
 				pm_schedule_suspend(netdev->dev.parent,
@@ -5337,6 +5338,14 @@ link_up:
 	adapter->gotc_old = adapter->stats.gotc;
 	spin_unlock(&adapter->stats64_lock);
 
+	/* If the link is lost the controller stops DMA, but
+	 * if there is queued Tx work it cannot be done.  So
+	 * reset the controller to flush the Tx packet buffers.
+	 */
+	if (!netif_carrier_ok(netdev) &&
+	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
+		adapter->flags |= FLAG_RESTART_NOW;
+
 	/* If reset is necessary, do it outside of interrupt context. */
 	if (adapter->flags & FLAG_RESTART_NOW) {
 		schedule_work(&adapter->reset_task);
@@ -5395,8 +5404,9 @@ link_up:
 
 	/* Reset the timer */
 	if (!test_bit(__E1000_DOWN, &adapter->state))
-		mod_timer(&adapter->watchdog_timer,
-			  round_jiffies(jiffies + 2 * HZ));
+		queue_delayed_work(adapter->e1000_workqueue,
+				   &adapter->watchdog_task,
+				   round_jiffies(2 * HZ));
 }
 
 #define E1000_TX_FLAGS_CSUM		0x00000001
@@ -6016,7 +6026,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 	}
 
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 	/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
 	adapter->max_frame_size = max_frame;
 	e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
@@ -6296,7 +6306,7 @@ static int e1000e_pm_freeze(struct device *dev)
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -6711,7 +6721,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev)
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -7251,11 +7261,21 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_eeprom;
 	}
 
-	timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+	adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+						   e1000e_driver_name);
+
+	if (!adapter->e1000_workqueue) {
+		err = -ENOMEM;
+		goto err_workqueue;
+	}
+
+	INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+	queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task,
+			   0);
+
 	timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
 
 	INIT_WORK(&adapter->reset_task, e1000_reset_task);
-	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
 	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
 	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
 	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
@@ -7349,6 +7369,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 err_register:
+	flush_workqueue(adapter->e1000_workqueue);
+	destroy_workqueue(adapter->e1000_workqueue);
+err_workqueue:
 	if (!(adapter->flags & FLAG_HAS_AMT))
 		e1000e_release_hw_control(adapter);
 err_eeprom:
@@ -7395,15 +7418,17 @@ static void e1000_remove(struct pci_dev *pdev)
 	 */
 	if (!down)
 		set_bit(__E1000_DOWN, &adapter->state);
-	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
 	cancel_work_sync(&adapter->reset_task);
-	cancel_work_sync(&adapter->watchdog_task);
 	cancel_work_sync(&adapter->downshift_task);
 	cancel_work_sync(&adapter->update_phy_task);
 	cancel_work_sync(&adapter->print_hang_task);
 
+	cancel_delayed_work(&adapter->watchdog_task);
+	flush_workqueue(adapter->e1000_workqueue);
+	destroy_workqueue(adapter->e1000_workqueue);
+
 	if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
 		cancel_work_sync(&adapter->tx_hwtstamp_work);
 		if (adapter->tx_hwtstamp_skb) {
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 937f9af22d26..e609f4df86f4 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -392,7 +392,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 				break;
 			}
 		}
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		nvm->ops.release(hw);
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 7ce42040b851..84bd06901014 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -27,6 +27,7 @@
 #include <net/ip6_checksum.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
 #include <linux/if_bridge.h>
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
@@ -295,8 +296,6 @@ struct i40e_cloud_filter {
 	u8 tunnel_type;
 };
 
-#define I40E_ETH_P_LLDP			0x88cc
-
 #define I40E_DCB_PRIO_TYPE_STRICT	0
 #define I40E_DCB_PRIO_TYPE_ETS		1
 #define I40E_DCB_STRICT_PRIO_CREDITS	127
@@ -414,6 +413,11 @@ struct i40e_flex_pit {
 	u8 pit_index;
 };
 
+struct i40e_fwd_adapter {
+	struct net_device *netdev;
+	int bit_no;
+};
+
 struct i40e_channel {
 	struct list_head list;
 	bool initialized;
@@ -428,11 +432,25 @@ struct i40e_channel {
 	struct i40e_aqc_vsi_properties_data info;
 
 	u64 max_tx_rate;
+	struct i40e_fwd_adapter *fwd;
 
 	/* track this channel belongs to which VSI */
 	struct i40e_vsi *parent_vsi;
 };
 
+static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
+{
+	return !!ch->fwd;
+}
+
+static inline u8 *i40e_channel_mac(struct i40e_channel *ch)
+{
+	if (i40e_is_channel_macvlan(ch))
+		return ch->fwd->netdev->dev_addr;
+	else
+		return NULL;
+}
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
 	struct pci_dev *pdev;
@@ -777,7 +795,8 @@ struct i40e_vsi {
 	u16 alloc_queue_pairs;	/* Allocated Tx/Rx queues */
 	u16 req_queue_pairs;	/* User requested queue pairs */
 	u16 num_queue_pairs;	/* Used tx and rx pairs */
-	u16 num_desc;
+	u16 num_tx_desc;
+	u16 num_rx_desc;
 	enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
 	s16 vf_id;		/* Virtual function ID for SRIOV VSIs */
 
@@ -814,6 +833,13 @@ struct i40e_vsi {
 	struct list_head ch_list;
 	u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
 
+	/* macvlan fields */
+#define I40E_MAX_MACVLANS		128 /* Max HW vectors - 1 on FVL */
+#define I40E_MIN_MACVLAN_VECTORS	2   /* Min vectors to enable macvlans */
+	DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS);
+	struct list_head macvlan_list;
+	int macvlan_cnt;
+
 	void *priv;	/* client driver data reference. */
 
 	/* VSI specific handlers */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 243dcd4bec19..814acbe79ffd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -675,7 +675,7 @@ static u16 i40e_clean_asq(struct i40e_hw *hw)
 	desc = I40E_ADMINQ_DESC(*asq, ntc);
 	details = I40E_ADMINQ_DETAILS(*asq, ntc);
 	while (rd32(hw, hw->aq.asq.head) != ntc) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
 			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
 
 		if (details->callback) {
@@ -835,7 +835,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
 	}
 
 	/* bump the tail */
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQTX: desc and buffer:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
 		      buff, buff_size);
 	(hw->aq.asq.next_to_use)++;
@@ -886,7 +886,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
 		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
 	}
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
 		   "AQTX: desc and buffer writeback:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
 
@@ -995,7 +995,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
 		memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
 		       e->msg_len);
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQRX: desc and buffer:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
 		      hw->aq.arq_buf_size);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ecb1adaa54ec..906cf68d3453 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -281,47 +281,49 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
 		   void *buffer, u16 buf_len)
 {
 	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+	u32 effective_mask = hw->debug_mask & mask;
+	char prefix[27];
 	u16 len;
 	u8 *buf = (u8 *)buffer;
 
-	if ((!(mask & hw->debug_mask)) || (desc == NULL))
+	if (!effective_mask || !desc)
 		return;
 
 	len = le16_to_cpu(aq_desc->datalen);
 
-	i40e_debug(hw, mask,
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
 		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
 		   le16_to_cpu(aq_desc->opcode),
 		   le16_to_cpu(aq_desc->flags),
 		   le16_to_cpu(aq_desc->datalen),
 		   le16_to_cpu(aq_desc->retval));
-	i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tcookie (h,l) 0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->cookie_high),
 		   le32_to_cpu(aq_desc->cookie_low));
-	i40e_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tparam (0,1)  0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->params.internal.param0),
 		   le32_to_cpu(aq_desc->params.internal.param1));
-	i40e_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\taddr (h,l)   0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->params.external.addr_high),
 		   le32_to_cpu(aq_desc->params.external.addr_low));
 
-	if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+	if (buffer && buf_len != 0 && len != 0 &&
+	    (effective_mask & I40E_DEBUG_AQ_DESC_BUFFER)) {
 		i40e_debug(hw, mask, "AQ CMD Buffer:\n");
 		if (buf_len < len)
 			len = buf_len;
-		/* write the full 16-byte chunks */
-		if (hw->debug_mask & mask) {
-			char prefix[27];
-
-			snprintf(prefix, sizeof(prefix),
-				 "i40e %02x:%02x.%x: \t0x",
-				 hw->bus.bus_id,
-				 hw->bus.device,
-				 hw->bus.func);
-
-			print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
-				       16, 1, buf, len, false);
-		}
+
+		snprintf(prefix, sizeof(prefix),
+			 "i40e %02x:%02x.%x: \t0x",
+			 hw->bus.bus_id,
+			 hw->bus.device,
+			 hw->bus.func);
+
+		print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
+			       16, 1, buf, len, false);
 	}
 }
 
@@ -1859,8 +1861,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
 	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
 		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
-	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver >= 7) {
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
 		__le32 tmp;
 
 		memcpy(&tmp, resp->link_type, sizeof(tmp));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 7ea4f09229e4..55d20acfcf70 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -333,8 +333,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		 "    seid = %d, id = %d, uplink_seid = %d\n",
 		 vsi->seid, vsi->id, vsi->uplink_seid);
 	dev_info(&pf->pdev->dev,
-		 "    base_queue = %d, num_queue_pairs = %d, num_desc = %d\n",
-		 vsi->base_queue, vsi->num_queue_pairs, vsi->num_desc);
+		 "    base_queue = %d, num_queue_pairs = %d, num_tx_desc = %d, num_rx_desc = %d\n",
+		 vsi->base_queue, vsi->num_queue_pairs, vsi->num_tx_desc,
+		 vsi->num_rx_desc);
 	dev_info(&pf->pdev->dev, "    type = %i\n", vsi->type);
 	if (vsi->type == I40E_VSI_SRIOV)
 		dev_info(&pf->pdev->dev, "    VF ID = %i\n", vsi->vf_id);
@@ -1330,7 +1331,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 			}
 			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
 						pf->hw.mac.addr,
-						I40E_ETH_P_LLDP, 0,
+						ETH_P_LLDP, 0,
 						pf->vsi[pf->lan_vsi]->seid,
 						0, true, NULL, NULL);
 			if (ret) {
@@ -1348,7 +1349,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 
 			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
 						pf->hw.mac.addr,
-						I40E_ETH_P_LLDP, 0,
+						ETH_P_LLDP, 0,
 						pf->vsi[pf->lan_vsi]->seid,
 						0, false, NULL, NULL);
 			if (ret) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 7545b21bee3c..527eb52c5401 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1982,6 +1982,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 			if (i40e_enabled_xdp_vsi(vsi))
 				vsi->xdp_rings[i]->count = new_tx_count;
 		}
+		vsi->num_tx_desc = new_tx_count;
+		vsi->num_rx_desc = new_rx_count;
 		goto done;
 	}
 
@@ -2118,6 +2120,8 @@ rx_unwind:
 		rx_rings = NULL;
 	}
 
+	vsi->num_tx_desc = new_tx_count;
+	vsi->num_rx_desc = new_rx_count;
 	i40e_up(vsi);
 
 free_tx:
@@ -4852,9 +4856,12 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
 static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
+	u64 orig_flags, new_flags, changed_flags;
+	enum i40e_admin_queue_err adq_err;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u64 orig_flags, new_flags, changed_flags;
+	bool is_reset_needed;
+	i40e_status status;
 	u32 i, j;
 
 	orig_flags = READ_ONCE(pf->flags);
@@ -4898,6 +4905,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 flags_complete:
 	changed_flags = orig_flags ^ new_flags;
 
+	is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+		I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED |
+		I40E_FLAG_DISABLE_FW_LLDP));
+
 	/* Before we finalize any flag changes, we need to perform some
 	 * checks to ensure that the changes are supported and safe.
 	 */
@@ -4932,13 +4943,6 @@ flags_complete:
 		return -EOPNOTSUPP;
 	}
 
-	/* Now that we've checked to ensure that the new flags are valid, load
-	 * them into place. Since we only modify flags either (a) during
-	 * initialization or (b) while holding the RTNL lock, we don't need
-	 * anything fancy here.
-	 */
-	pf->flags = new_flags;
-
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
 	 * changed in the code above.
@@ -4946,7 +4950,7 @@ flags_complete:
 
 	/* Flush current ATR settings if ATR was disabled */
 	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
-	    !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) {
+	    !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) {
 		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
@@ -4955,7 +4959,7 @@ flags_complete:
 		u16 sw_flags = 0, valid_flags = 0;
 		int ret;
 
-		if (!(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+		if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
 			sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
@@ -4974,13 +4978,13 @@ flags_complete:
 	    (changed_flags & I40E_FLAG_BASE_R_FEC)) {
 		u8 fec_cfg = 0;
 
-		if (pf->flags & I40E_FLAG_RS_FEC &&
-		    pf->flags & I40E_FLAG_BASE_R_FEC) {
+		if (new_flags & I40E_FLAG_RS_FEC &&
+		    new_flags & I40E_FLAG_BASE_R_FEC) {
 			fec_cfg = I40E_AQ_SET_FEC_AUTO;
-		} else if (pf->flags & I40E_FLAG_RS_FEC) {
+		} else if (new_flags & I40E_FLAG_RS_FEC) {
 			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
 				   I40E_AQ_SET_FEC_ABILITY_RS);
-		} else if (pf->flags & I40E_FLAG_BASE_R_FEC) {
+		} else if (new_flags & I40E_FLAG_BASE_R_FEC) {
 			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
 				   I40E_AQ_SET_FEC_ABILITY_KR);
 		}
@@ -4988,14 +4992,14 @@ flags_complete:
 			dev_warn(&pf->pdev->dev, "Cannot change FEC config\n");
 	}
 
-	if ((changed_flags & pf->flags &
+	if ((changed_flags & new_flags &
 	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
-	    (pf->flags & I40E_FLAG_MFP_ENABLED))
+	    (new_flags & I40E_FLAG_MFP_ENABLED))
 		dev_warn(&pf->pdev->dev,
 			 "Turning on link-down-on-close flag may affect other partitions\n");
 
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
 			struct i40e_dcbx_config *dcbcfg;
 
 			i40e_aq_stop_lldp(&pf->hw, true, false, NULL);
@@ -5013,17 +5017,43 @@ flags_complete:
 			dcbcfg->pfc.willing = 1;
 			dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
 		} else {
-			i40e_aq_start_lldp(&pf->hw, false, NULL);
+			status = i40e_aq_start_lldp(&pf->hw, false, NULL);
+			if (status) {
+				adq_err = pf->hw.aq.asq_last_status;
+				switch (adq_err) {
+				case I40E_AQ_RC_EEXIST:
+					dev_warn(&pf->pdev->dev,
+						 "FW LLDP agent is already running\n");
+					is_reset_needed = false;
+					break;
+				case I40E_AQ_RC_EPERM:
+					dev_warn(&pf->pdev->dev,
+						 "Device configuration forbids SW from starting the LLDP agent.\n");
+					return -EINVAL;
+				default:
+					dev_warn(&pf->pdev->dev,
+						 "Starting FW LLDP agent failed: error: %s, %s\n",
+						 i40e_stat_str(&pf->hw,
+							       status),
+						 i40e_aq_str(&pf->hw,
+							     adq_err));
+					return -EINVAL;
+				}
+			}
 		}
 	}
 
+	/* Now that we've checked to ensure that the new flags are valid, load
+	 * them into place. Since we only modify flags either (a) during
+	 * initialization or (b) while holding the RTNL lock, we don't need
+	 * anything fancy here.
+	 */
+	pf->flags = new_flags;
+
 	/* Issue reset to cause things to take effect, as additional bits
 	 * are added we will need to create a mask of bits requiring reset
 	 */
-	if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
-			     I40E_FLAG_LEGACY_RX |
-			     I40E_FLAG_SOURCE_PRUNING_DISABLED |
-			     I40E_FLAG_DISABLE_FW_LLDP))
+	if (is_reset_needed)
 		i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
 
 	return 0;
@@ -5181,6 +5211,16 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	return -EOPNOTSUPP;
+}
+
+static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	return -EOPNOTSUPP;
+}
+
 static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
 	.set_eeprom		= i40e_set_eeprom,
 	.get_eeprom_len		= i40e_get_eeprom_len,
@@ -5208,6 +5248,8 @@ static const struct ethtool_ops i40e_ethtool_ops = {
 	.set_rxnfc		= i40e_set_rxnfc,
 	.self_test		= i40e_diag_test,
 	.get_strings		= i40e_get_strings,
+	.get_eee		= i40e_get_eee,
+	.set_eee		= i40e_set_eee,
 	.set_phys_id		= i40e_set_phys_id,
 	.get_sset_count		= i40e_get_sset_count,
 	.get_ethtool_stats	= i40e_get_ethtool_stats,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 320562b39686..9ebbe3da61bb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -32,7 +32,7 @@ static const char i40e_driver_string[] =
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
 const char i40e_driver_version_str[] = DRV_VERSION;
-static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation.";
+static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
 
 /* a bit of forward declarations */
 static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
@@ -636,9 +636,6 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi)
 	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
 			   vsi->stat_offsets_loaded,
 			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
-	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
-			   vsi->stat_offsets_loaded,
-			   &oes->tx_errors, &es->tx_errors);
 
 	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
 			   I40E_GLV_GORCL(stat_idx),
@@ -5864,8 +5861,10 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
 		return -ENOENT;
 	}
 
-	/* Success, update channel */
-	ch->enabled_tc = enabled_tc;
+	/* Success, update channel, set enabled_tc only if the channel
+	 * is not a macvlan
+	 */
+	ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
 	ch->seid = ctxt.seid;
 	ch->vsi_number = ctxt.vsi_number;
 	ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
@@ -6413,6 +6412,50 @@ static int i40e_resume_port_tx(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_update_dcb_config
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the firmware
+ **/
+static enum i40e_status_code
+i40e_update_dcb_config(struct i40e_hw *hw, bool enable_mib_change)
+{
+	struct i40e_lldp_variables lldp_cfg;
+	i40e_status ret;
+
+	if (!hw->func_caps.dcb)
+		return I40E_NOT_SUPPORTED;
+
+	/* Read LLDP NVM area */
+	ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	if (ret)
+		return I40E_ERR_NOT_READY;
+
+	/* Get DCBX status */
+	ret = i40e_get_dcbx_status(hw, &hw->dcbx_status);
+	if (ret)
+		return ret;
+
+	/* Check the DCBX Status */
+	if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
+	    hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
+		/* Get current DCBX configuration */
+		ret = i40e_get_dcb_config(hw);
+		if (ret)
+			return ret;
+	} else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
+		return I40E_ERR_NOT_READY;
+	}
+
+	/* Configure the LLDP MIB change event */
+	if (enable_mib_change)
+		ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
+
+	return ret;
+}
+
+/**
  * i40e_init_pf_dcb - Initialize DCB configuration
  * @pf: PF being configured
  *
@@ -6428,11 +6471,13 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 	 * Also do not enable DCBx if FW LLDP agent is disabled
 	 */
 	if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) ||
-	    (pf->flags & I40E_FLAG_DISABLE_FW_LLDP))
+	    (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) {
+		dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n");
+		err = I40E_NOT_SUPPORTED;
 		goto out;
+	}
 
-	/* Get the initial DCB configuration */
-	err = i40e_init_dcb(hw, true);
+	err = i40e_update_dcb_config(hw, true);
 	if (!err) {
 		/* Device/Function is not DCBX capable */
 		if ((!hw->func_caps.dcb) ||
@@ -6869,6 +6914,489 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_del_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+					   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_remove_macvlan_element_data element;
+	i40e_status status;
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+	status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_add_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+					   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_add_macvlan_element_data element;
+	i40e_status status;
+	u16 cmd_flags = 0;
+
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.queue_number = 0;
+	element.match_method = I40E_AQC_MM_ERR_NO_RES;
+	cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+	element.flags = cpu_to_le16(cmd_flags);
+	status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_reset_ch_rings - Reset the queue contexts in a channel
+ * @vsi: the VSI we want to access
+ * @ch: the channel we want to access
+ */
+static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
+{
+	struct i40e_ring *tx_ring, *rx_ring;
+	u16 pf_q;
+	int i;
+
+	for (i = 0; i < ch->num_queue_pairs; i++) {
+		pf_q = ch->base_queue + i;
+		tx_ring = vsi->tx_rings[pf_q];
+		tx_ring->ch = NULL;
+		rx_ring = vsi->rx_rings[pf_q];
+		rx_ring->ch = NULL;
+	}
+}
+
+/**
+ * i40e_free_macvlan_channels
+ * @vsi: the VSI we want to access
+ *
+ * This function frees the Qs of the channel VSI from
+ * the stack and also deletes the channel VSIs which
+ * serve as macvlans.
+ */
+static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int ret;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		struct i40e_vsi *parent_vsi;
+
+		if (i40e_is_channel_macvlan(ch)) {
+			i40e_reset_ch_rings(vsi, ch);
+			clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+			netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
+			netdev_set_sb_channel(ch->fwd->netdev, 0);
+			kfree(ch->fwd);
+			ch->fwd = NULL;
+		}
+
+		list_del(&ch->list);
+		parent_vsi = ch->parent_vsi;
+		if (!parent_vsi || !ch->initialized) {
+			kfree(ch);
+			continue;
+		}
+
+		/* remove the VSI */
+		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+					     NULL);
+		if (ret)
+			dev_err(&vsi->back->pdev->dev,
+				"unable to remove channel (%d) for parent VSI(%d)\n",
+				ch->seid, parent_vsi->seid);
+		kfree(ch);
+	}
+	vsi->macvlan_cnt = 0;
+}
+
+/**
+ * i40e_fwd_ring_up - bring the macvlan device up
+ * @vsi: the VSI we want to access
+ * @vdev: macvlan netdevice
+ * @fwd: the private fwd structure
+ */
+static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+			    struct i40e_fwd_adapter *fwd)
+{
+	int ret = 0, num_tc = 1,  i, aq_err;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	if (list_empty(&vsi->macvlan_list))
+		return -EINVAL;
+
+	/* Go through the list and find an available channel */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (!i40e_is_channel_macvlan(ch)) {
+			ch->fwd = fwd;
+			/* record configuration for macvlan interface in vdev */
+			for (i = 0; i < num_tc; i++)
+				netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+							     i,
+							     ch->num_queue_pairs,
+							     ch->base_queue);
+			for (i = 0; i < ch->num_queue_pairs; i++) {
+				struct i40e_ring *tx_ring, *rx_ring;
+				u16 pf_q;
+
+				pf_q = ch->base_queue + i;
+
+				/* Get to TX ring ptr */
+				tx_ring = vsi->tx_rings[pf_q];
+				tx_ring->ch = ch;
+
+				/* Get the RX ring ptr */
+				rx_ring = vsi->rx_rings[pf_q];
+				rx_ring->ch = ch;
+			}
+			break;
+		}
+	}
+
+	/* Guarantee all rings are updated before we update the
+	 * MAC address filter.
+	 */
+	wmb();
+
+	/* Add a mac filter */
+	ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
+	if (ret) {
+		/* if we cannot add the MAC rule then disable the offload */
+		macvlan_release_l2fw_offload(vdev);
+		for (i = 0; i < ch->num_queue_pairs; i++) {
+			struct i40e_ring *rx_ring;
+			u16 pf_q;
+
+			pf_q = ch->base_queue + i;
+			rx_ring = vsi->rx_rings[pf_q];
+			rx_ring->netdev = NULL;
+		}
+		dev_info(&pf->pdev->dev,
+			 "Error adding mac filter on macvlan err %s, aq_err %s\n",
+			  i40e_stat_str(hw, ret),
+			  i40e_aq_str(hw, aq_err));
+		netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_setup_macvlans - create the channels which will be macvlans
+ * @vsi: the VSI we want to access
+ * @macvlan_cnt: no. of macvlans to be setup
+ * @qcnt: no. of Qs per macvlan
+ * @vdev: macvlan netdevice
+ */
+static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+			       struct net_device *vdev)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	u16 sections, qmap, num_qps;
+	struct i40e_channel *ch;
+	int i, pow, ret = 0;
+	u8 offset = 0;
+
+	if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
+		return -EINVAL;
+
+	num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = fls(roundup_pow_of_two(num_qps) - 1);
+
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup context bits for the main VSI */
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = vsi->back->hw.pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	ctxt.info.valid_sections |= cpu_to_le16(sections);
+
+	/* Reconfigure RSS for main VSI with new max queue count */
+	vsi->rss_size = max_t(u16, num_qps, qcnt);
+	ret = i40e_vsi_config_rss(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed to reconfig RSS for num_queues (%u)\n",
+			 vsi->rss_size);
+		return ret;
+	}
+	vsi->reconfig_rss = true;
+	dev_dbg(&vsi->back->pdev->dev,
+		"Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
+	vsi->next_base_queue = num_qps;
+	vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
+
+	/* Update the VSI after updating the VSI queue-mapping
+	 * information
+	 */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Update vsi tc config failed, err %s aq_err %s\n",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	/* Create channels for macvlans */
+	INIT_LIST_HEAD(&vsi->macvlan_list);
+	for (i = 0; i < macvlan_cnt; i++) {
+		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+		if (!ch) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+		INIT_LIST_HEAD(&ch->list);
+		ch->num_queue_pairs = qcnt;
+		if (!i40e_setup_channel(pf, vsi, ch)) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+		ch->parent_vsi = vsi;
+		vsi->cnt_q_avail -= ch->num_queue_pairs;
+		vsi->macvlan_cnt++;
+		list_add_tail(&ch->list, &vsi->macvlan_list);
+	}
+
+	return ret;
+
+err_free:
+	dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
+	i40e_free_macvlan_channels(vsi);
+
+	return ret;
+}
+
+/**
+ * i40e_fwd_add - configure macvlans
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ **/
+static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_fwd_adapter *fwd;
+	int avail_macvlan, ret;
+
+	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
+		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
+		netdev_info(netdev, "Not enough vectors available to support macvlans\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The macvlan device has to be a single Q device so that the
+	 * tc_to_txq field can be reused to pick the tx queue.
+	 */
+	if (netif_is_multiqueue(vdev))
+		return ERR_PTR(-ERANGE);
+
+	if (!vsi->macvlan_cnt) {
+		/* reserve bit 0 for the pf device */
+		set_bit(0, vsi->fwd_bitmask);
+
+		/* Try to reserve as many queues as possible for macvlans. First
+		 * reserve 3/4th of max vectors, then half, then quarter and
+		 * calculate Qs per macvlan as you go
+		 */
+		vectors = pf->num_lan_msix;
+		if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
+			/* allocate 4 Qs per macvlan and 32 Qs to the PF*/
+			q_per_macvlan = 4;
+			macvlan_cnt = (vectors - 32) / 4;
+		} else if (vectors <= 64 && vectors > 32) {
+			/* allocate 2 Qs per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 2;
+			macvlan_cnt = (vectors - 16) / 2;
+		} else if (vectors <= 32 && vectors > 16) {
+			/* allocate 1 Q per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 16;
+		} else if (vectors <= 16 && vectors > 8) {
+			/* allocate 1 Q per macvlan and 8 Qs to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 8;
+		} else {
+			/* allocate 1 Q per macvlan and 1 Q to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 1;
+		}
+
+		if (macvlan_cnt == 0)
+			return ERR_PTR(-EBUSY);
+
+		/* Quiesce VSI queues */
+		i40e_quiesce_vsi(vsi);
+
+		/* sets up the macvlans but does not "enable" them */
+		ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
+					  vdev);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Unquiesce VSI */
+		i40e_unquiesce_vsi(vsi);
+	}
+	avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
+					    vsi->macvlan_cnt);
+	if (avail_macvlan >= I40E_MAX_MACVLANS)
+		return ERR_PTR(-EBUSY);
+
+	/* create the fwd struct */
+	fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
+	if (!fwd)
+		return ERR_PTR(-ENOMEM);
+
+	set_bit(avail_macvlan, vsi->fwd_bitmask);
+	fwd->bit_no = avail_macvlan;
+	netdev_set_sb_channel(vdev, avail_macvlan);
+	fwd->netdev = vdev;
+
+	if (!netif_running(netdev))
+		return fwd;
+
+	/* Set fwd ring up */
+	ret = i40e_fwd_ring_up(vsi, vdev, fwd);
+	if (ret) {
+		/* unbind the queues and drop the subordinate channel config */
+		netdev_unbind_sb_channel(netdev, vdev);
+		netdev_set_sb_channel(vdev, 0);
+
+		kfree(fwd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return fwd;
+}
+
+/**
+ * i40e_del_all_macvlans - Delete all the mac filters on the channels
+ * @vsi: the VSI we want to access
+ */
+static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(vsi->netdev,
+							 ch->fwd->netdev);
+				netdev_set_sb_channel(ch->fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * i40e_fwd_del - delete macvlan interfaces
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ */
+static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_fwd_adapter *fwd = vdev;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	/* Find the channel associated with the macvlan and del mac filter */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch) &&
+		    ether_addr_equal(i40e_channel_mac(ch),
+				     fwd->netdev->dev_addr)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(netdev, fwd->netdev);
+				netdev_set_sb_channel(fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			} else {
+				dev_info(&pf->pdev->dev,
+					 "Error deleting mac filter on macvlan err %s, aq_err %s\n",
+					  i40e_stat_str(hw, ret),
+					  i40e_aq_str(hw, aq_err));
+			}
+			break;
+		}
+	}
+}
+
+/**
  * i40e_setup_tc - configure multiple traffic classes
  * @netdev: net device to configure
  * @type_data: tc offload data
@@ -6963,6 +7491,10 @@ config_tc:
 			    vsi->seid);
 		need_reset = true;
 		goto exit;
+	} else {
+		dev_info(&vsi->back->pdev->dev,
+			 "Setup channel (id:%u) utilizing num_queues %d\n",
+			 vsi->seid, vsi->tc_config.tc_info[0].qcount);
 	}
 
 	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
@@ -7227,15 +7759,15 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
 /**
  * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  * @filter: Pointer to cloud filter structure
  *
  **/
 static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
-				 struct tc_cls_flower_offload *f,
+				 struct flow_cls_offload *f,
 				 struct i40e_cloud_filter *filter)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
 	struct i40e_pf *pf = vsi->back;
@@ -7469,11 +8001,11 @@ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
 /**
  * i40e_configure_clsflower - Configure tc flower filters
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  *
  **/
 static int i40e_configure_clsflower(struct i40e_vsi *vsi,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
 	struct i40e_cloud_filter *filter = NULL;
@@ -7565,11 +8097,11 @@ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
 /**
  * i40e_delete_clsflower - Remove tc flower filters
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  *
  **/
 static int i40e_delete_clsflower(struct i40e_vsi *vsi,
-				 struct tc_cls_flower_offload *cls_flower)
+				 struct flow_cls_offload *cls_flower)
 {
 	struct i40e_cloud_filter *filter = NULL;
 	struct i40e_pf *pf = vsi->back;
@@ -7612,16 +8144,16 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
  * @type_data: offload data
  **/
 static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	struct i40e_vsi *vsi = np->vsi;
 
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return i40e_configure_clsflower(vsi, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return i40e_delete_clsflower(vsi, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
@@ -7645,34 +8177,21 @@ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int i40e_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct i40e_netdev_priv *np = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
-					     np, np, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(i40e_block_cb_list);
 
 static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			   void *type_data)
 {
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+
 	switch (type) {
 	case TC_SETUP_QDISC_MQPRIO:
 		return i40e_setup_tc(netdev, type_data);
 	case TC_SETUP_BLOCK:
-		return i40e_setup_tc_block(netdev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &i40e_block_cb_list,
+						  i40e_setup_tc_block_cb,
+						  np, np, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -8570,7 +9089,7 @@ static void i40e_link_event(struct i40e_pf *pf)
 	/* Notify the base of the switch tree connected to
 	 * the link.  Floating VEBs are not notified.
 	 */
-	if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+	if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
 		i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
 	else
 		i40e_vsi_link_event(vsi, new_link);
@@ -10031,8 +10550,12 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 	switch (vsi->type) {
 	case I40E_VSI_MAIN:
 		vsi->alloc_queue_pairs = pf->num_lan_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
 			vsi->num_q_vectors = pf->num_lan_msix;
 		else
@@ -10042,22 +10565,32 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 
 	case I40E_VSI_FDIR:
 		vsi->alloc_queue_pairs = 1;
-		vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		vsi->num_q_vectors = pf->num_fdsb_msix;
 		break;
 
 	case I40E_VSI_VMDQ2:
 		vsi->alloc_queue_pairs = pf->num_vmdq_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		vsi->num_q_vectors = pf->num_vmdq_msix;
 		break;
 
 	case I40E_VSI_SRIOV:
 		vsi->alloc_queue_pairs = pf->num_vf_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		break;
 
 	default:
@@ -10333,7 +10866,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->vsi = vsi;
 		ring->netdev = vsi->netdev;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10350,7 +10883,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->vsi = vsi;
 		ring->netdev = NULL;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10366,7 +10899,7 @@ setup_rx:
 		ring->vsi = vsi;
 		ring->netdev = vsi->netdev;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_rx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		ring->itr_setting = pf->rx_itr_default;
@@ -11604,6 +12137,9 @@ static int i40e_set_features(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
+		i40e_del_all_macvlans(vsi);
+
 	need_reset = i40e_set_ntuple(pf, features);
 
 	if (need_reset)
@@ -12348,6 +12884,8 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_bpf		= i40e_xdp,
 	.ndo_xdp_xmit		= i40e_xdp_xmit,
 	.ndo_xsk_async_xmit	= i40e_xsk_async_xmit,
+	.ndo_dfwd_add_station	= i40e_fwd_add,
+	.ndo_dfwd_del_station	= i40e_fwd_del,
 };
 
 /**
@@ -12407,6 +12945,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	/* record features VLANs can make use of */
 	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
 
+	/* enable macvlan offloads */
+	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+
 	hw_features = hw_enc_features		|
 		      NETIF_F_HW_VLAN_CTAG_TX	|
 		      NETIF_F_HW_VLAN_CTAG_RX;
@@ -12519,7 +13060,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
 	struct i40e_pf *pf = vsi->back;
 
 	/* Uplink is not a bridge so default to VEB */
-	if (vsi->veb_idx == I40E_NO_VEB)
+	if (vsi->veb_idx >= I40E_MAX_VEB)
 		return 1;
 
 	veb = pf->veb[vsi->veb_idx];
@@ -13577,7 +14118,7 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
 		/* Main VEB? */
 		if (uplink_seid != pf->mac_seid)
 			break;
-		if (pf->lan_veb == I40E_NO_VEB) {
+		if (pf->lan_veb >= I40E_MAX_VEB) {
 			int v;
 
 			/* find existing or else empty VEB */
@@ -13587,13 +14128,15 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
 					break;
 				}
 			}
-			if (pf->lan_veb == I40E_NO_VEB) {
+			if (pf->lan_veb >= I40E_MAX_VEB) {
 				v = i40e_veb_mem_alloc(pf);
 				if (v < 0)
 					break;
 				pf->lan_veb = v;
 			}
 		}
+		if (pf->lan_veb >= I40E_MAX_VEB)
+			break;
 
 		pf->veb[pf->lan_veb]->seid = seid;
 		pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
@@ -13747,7 +14290,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 		/* Set up the PF VSI associated with the PF's main VSI
 		 * that is already in the HW switch
 		 */
-		if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+		if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
 			uplink_seid = pf->veb[pf->lan_veb]->seid;
 		else
 			uplink_seid = pf->mac_seid;
@@ -14203,7 +14746,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
 				I40E_MAX_CSR_SPACE);
-
+	/* We believe that the highest register to read is
+	 * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size
+	 * is not less than that before mapping to prevent a
+	 * kernel panic.
+	 */
+	if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
+		dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
+			pf->ioremap_len);
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
 	hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
 	if (!hw->hw_addr) {
 		err = -EIO;
@@ -14388,6 +14941,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, pf);
 	pci_save_state(pdev);
 
+	dev_info(&pdev->dev,
+		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+			"FW LLDP is disabled\n" :
+			"FW LLDP is enabled\n");
+
 	/* Enable FW to write default DCB config on link-up */
 	i40e_aq_set_dcb_parameters(hw, true, NULL);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 882627073dce..eac88bcc6c06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -350,6 +350,10 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed)
 		return VIRTCHNL_LINK_SPEED_100MB;
 	case I40E_LINK_SPEED_1GB:
 		return VIRTCHNL_LINK_SPEED_1GB;
+	case I40E_LINK_SPEED_2_5GB:
+		return VIRTCHNL_LINK_SPEED_2_5GB;
+	case I40E_LINK_SPEED_5GB:
+		return VIRTCHNL_LINK_SPEED_5GB;
 	case I40E_LINK_SPEED_10GB:
 		return VIRTCHNL_LINK_SPEED_10GB;
 	case I40E_LINK_SPEED_40GB:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 439c35f0c581..11394a52e21c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -140,8 +140,7 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
  * @ptp: The PTP clock structure
  * @delta: Offset in nanoseconds to adjust the PHC time by
  *
- * Adjust the frequency of the PHC by the indicated parts per billion from the
- * base frequency.
+ * Adjust the current clock time by a delta specified in nanoseconds.
  **/
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 20a283702c9f..2a2fe3ec7926 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -774,7 +774,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 			      struct i40e_ring *tx_ring, int napi_budget)
 {
-	u16 i = tx_ring->next_to_clean;
+	int i = tx_ring->next_to_clean;
 	struct i40e_tx_buffer *tx_buf;
 	struct i40e_tx_desc *tx_head;
 	struct i40e_tx_desc *tx_desc;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 479bc60c8f71..02b09a8ad54c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -440,7 +440,7 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
 	struct virtchnl_iwarp_qv_info *qv_info;
 	u32 v_idx, i, reg_idx, reg;
 	u32 next_q_idx, next_q_type;
-	u32 msix_vf, size;
+	u32 msix_vf;
 	int ret = 0;
 
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
@@ -454,11 +454,10 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
 		goto err_out;
 	}
 
-	size = sizeof(struct virtchnl_iwarp_qvlist_info) +
-	       (sizeof(struct virtchnl_iwarp_qv_info) *
-						(qvlist_info->num_vectors - 1));
 	kfree(vf->qvlist_info);
-	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+	vf->qvlist_info = kzalloc(struct_size(vf->qvlist_info, qv_info,
+					      qvlist_info->num_vectors - 1),
+				  GFP_KERNEL);
 	if (!vf->qvlist_info) {
 		ret = -ENOMEM;
 		goto err_out;
@@ -470,14 +469,15 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
 		qv_info = &qvlist_info->qv_info[i];
 		if (!qv_info)
 			continue;
-		v_idx = qv_info->v_idx;
 
 		/* Validate vector id belongs to this vf */
-		if (!i40e_vc_isvalid_vector_id(vf, v_idx)) {
+		if (!i40e_vc_isvalid_vector_id(vf, qv_info->v_idx)) {
 			ret = -EINVAL;
 			goto err_free;
 		}
 
+		v_idx = qv_info->v_idx;
+
 		vf->qvlist_info->qv_info[i] = *qv_info;
 
 		reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
@@ -1845,7 +1845,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	i40e_status aq_ret = 0;
 	struct i40e_vsi *vsi;
 	int num_vsis = 1;
-	int len = 0;
+	size_t len = 0;
 	int ret;
 
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
@@ -1853,9 +1853,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 		goto err;
 	}
 
-	len = (sizeof(struct virtchnl_vf_resource) +
-	       sizeof(struct virtchnl_vsi_resource) * num_vsis);
-
+	len = struct_size(vfres, vsi_res, num_vsis);
 	vfres = kzalloc(len, GFP_KERNEL);
 	if (!vfres) {
 		aq_ret = I40E_ERR_NO_MEMORY;
@@ -2135,8 +2133,13 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
 			}
 		}
 
-		if (vf->adq_enabled)
+		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = I40E_ERR_NO_AVAILABLE_VSI;
+				goto error_param;
+			}
 			vsi_id = vf->ch[idx].vsi_id;
+		}
 
 		if (i40e_config_vsi_rx_queue(vf, vsi_id, vsi_queue_id,
 					     &qpi->rxq) ||
@@ -2152,6 +2155,10 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
 		 * to its appropriate VSIs based on TC mapping
 		 **/
 		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = I40E_ERR_NO_AVAILABLE_VSI;
+				goto error_param;
+			}
 			if (j == (vf->ch[idx].num_qps - 1)) {
 				idx++;
 				j = 0; /* resetting the queue count */
@@ -2318,7 +2325,6 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
-	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
@@ -2327,7 +2333,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2427,18 +2433,14 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_vf_res_request *vfres =
 		(struct virtchnl_vf_res_request *)msg;
-	int req_pairs = vfres->num_queue_pairs;
-	int cur_pairs = vf->num_queue_pairs;
+	u16 req_pairs = vfres->num_queue_pairs;
+	u8 cur_pairs = vf->num_queue_pairs;
 	struct i40e_pf *pf = vf->pf;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
 		return -EINVAL;
 
-	if (req_pairs <= 0) {
-		dev_err(&pf->pdev->dev,
-			"VF %d tried to request %d queues.  Ignoring.\n",
-			vf->vf_id, req_pairs);
-	} else if (req_pairs > I40E_MAX_VF_QUEUES) {
+	if (req_pairs > I40E_MAX_VF_QUEUES) {
 		dev_err(&pf->pdev->dev,
 			"VF %d tried to request more than %d queues.\n",
 			vf->vf_id,
@@ -2509,7 +2511,7 @@ error_param:
  * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast
  */
 #define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1)
-#define I40E_VC_MAX_VLAN_PER_VF 8
+#define I40E_VC_MAX_VLAN_PER_VF 16
 
 /**
  * i40e_check_vf_permission
@@ -2587,12 +2589,11 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = al->vsi_id;
 	i40e_status ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
 		ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2657,12 +2658,11 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = al->vsi_id;
 	i40e_status ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
 		ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2726,7 +2726,6 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg)
 	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vfl->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
@@ -2737,7 +2736,7 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg)
 		goto error_param;
 	}
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2798,12 +2797,11 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg)
 	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vfl->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2920,11 +2918,10 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg)
 		(struct virtchnl_rss_key *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vrk->vsi_id;
 	i40e_status aq_ret = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) ||
 	    (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
@@ -2951,16 +2948,22 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg)
 		(struct virtchnl_rss_lut *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vrl->vsi_id;
 	i40e_status aq_ret = 0;
+	u16 i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) ||
 	    (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	for (i = 0; i < vrl->lut_entries; i++)
+		if (vrl->lut[i] >= vf->num_queue_pairs) {
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+		}
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE);
 	/* send the response to the VF */
@@ -3041,14 +3044,15 @@ err:
  **/
 static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_status aq_ret = 0;
+	struct i40e_vsi *vsi;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_vlan_stripping_enable(vsi);
 
 	/* send the response to the VF */
@@ -3066,14 +3070,15 @@ err:
  **/
 static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_status aq_ret = 0;
+	struct i40e_vsi *vsi;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_vlan_stripping_disable(vsi);
 
 	/* send the response to the VF */
@@ -3531,8 +3536,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 		(struct virtchnl_tc_info *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_link_status *ls = &pf->hw.phy.link_info;
-	int i, adq_request_qps = 0, speed = 0;
+	int i, adq_request_qps = 0;
 	i40e_status aq_ret = 0;
+	u64 speed = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -3558,8 +3564,8 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 	/* max number of traffic classes for VF currently capped at 4 */
 	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
 		dev_err(&pf->pdev->dev,
-			"VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
-			vf->vf_id, tci->num_tc);
+			"VF %d trying to set %u TCs, valid range 1-%u TCs per VF\n",
+			vf->vf_id, tci->num_tc, I40E_MAX_VF_VSI);
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
@@ -3569,8 +3575,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 		if (!tci->list[i].count ||
 		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
 			dev_err(&pf->pdev->dev,
-				"VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
-				vf->vf_id, i, tci->list[i].count);
+				"VF %d: TC %d trying to set %u queues, valid range 1-%u queues per TC\n",
+				vf->vf_id, i, tci->list[i].count,
+				I40E_DEFAULT_QUEUES_PER_VF);
 			aq_ret = I40E_ERR_PARAM;
 			goto err;
 		}
@@ -3730,19 +3737,6 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	/* perform basic checks on the msg */
 	ret = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
 
-	/* perform additional checks specific to this driver */
-	if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) {
-		struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg;
-
-		if (vrk->key_len != I40E_HKEY_ARRAY_SIZE)
-			ret = -EINVAL;
-	} else if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) {
-		struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
-
-		if (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)
-			ret = -EINVAL;
-	}
-
 	if (ret) {
 		i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM);
 		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
@@ -3943,6 +3937,11 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	int bkt;
 	u8 i;
 
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
 	/* validate the request */
 	ret = i40e_validate_vf(pf, vf_id);
 	if (ret)
@@ -3967,11 +3966,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		goto error_param;
 	}
 
-	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
-		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
-		return -EAGAIN;
-	}
-
 	if (is_multicast_ether_addr(mac)) {
 		dev_err(&pf->pdev->dev,
 			"Invalid Ethernet address %pM for VF %d\n", mac, vf_id);
@@ -4302,10 +4296,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev,
 	vf = &pf->vf[vf_id];
 	/* first vsi is always the LAN vsi */
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
-		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
-			vf_id);
-		ret = -EAGAIN;
+	if (!vsi) {
+		ret = -ENOENT;
 		goto error_param;
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 1b17486543ac..32bad014d76c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -215,6 +215,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
 	case XDP_ABORTED:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		/* fallthrough -- handle aborts by dropping packet */
@@ -640,8 +641,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 	struct i40e_tx_desc *tx_desc = NULL;
 	struct i40e_tx_buffer *tx_bi;
 	bool work_done = true;
+	struct xdp_desc desc;
 	dma_addr_t dma;
-	u32 len;
 
 	while (budget-- > 0) {
 		if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
@@ -650,21 +651,23 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 			break;
 		}
 
-		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
 			break;
 
-		dma_sync_single_for_device(xdp_ring->dev, dma, len,
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
 					   DMA_BIDIRECTIONAL);
 
 		tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
-		tx_bi->bytecount = len;
+		tx_bi->bytecount = desc.len;
 
 		tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
 		tx_desc->buffer_addr = cpu_to_le64(dma);
 		tx_desc->cmd_type_offset_bsz =
 			build_ctob(I40E_TX_DESC_CMD_ICRC
 				   | I40E_TX_DESC_CMD_EOP,
-				   0, len, 0);
+				   0, desc.len, 0);
 
 		xdp_ring->next_to_use++;
 		if (xdp_ring->next_to_use == xdp_ring->count)
diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index 9cbb5743ed12..c997063ed728 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -12,4 +12,4 @@ subdir-ccflags-y += -I$(src)
 obj-$(CONFIG_IAVF) += iavf.o
 
 iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \
-	     iavf_txrx.o iavf_common.o i40e_adminq.o iavf_client.o
+	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h
deleted file mode 100644
index e5ae4a1c0cff..000000000000
--- a/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h
+++ /dev/null
@@ -1,530 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_ADMINQ_CMD_H_
-#define _I40E_ADMINQ_CMD_H_
-
-/* This header file defines the i40e Admin Queue commands and is shared between
- * i40e Firmware and Software.  Do not change the names in this file to IAVF
- * because this file should be diff-able against the i40e version, even
- * though many parts have been removed in this VF version.
- *
- * This file needs to comply with the Linux Kernel coding style.
- */
-
-#define I40E_FW_API_VERSION_MAJOR	0x0001
-#define I40E_FW_API_VERSION_MINOR_X722	0x0005
-#define I40E_FW_API_VERSION_MINOR_X710	0x0008
-
-#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
-					I40E_FW_API_VERSION_MINOR_X710 : \
-					I40E_FW_API_VERSION_MINOR_X722)
-
-/* API version 1.7 implements additional link and PHY-specific APIs  */
-#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
-
-struct i40e_aq_desc {
-	__le16 flags;
-	__le16 opcode;
-	__le16 datalen;
-	__le16 retval;
-	__le32 cookie_high;
-	__le32 cookie_low;
-	union {
-		struct {
-			__le32 param0;
-			__le32 param1;
-			__le32 param2;
-			__le32 param3;
-		} internal;
-		struct {
-			__le32 param0;
-			__le32 param1;
-			__le32 addr_high;
-			__le32 addr_low;
-		} external;
-		u8 raw[16];
-	} params;
-};
-
-/* Flags sub-structure
- * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
- * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
- */
-
-/* command flags and offsets*/
-#define I40E_AQ_FLAG_DD_SHIFT	0
-#define I40E_AQ_FLAG_CMP_SHIFT	1
-#define I40E_AQ_FLAG_ERR_SHIFT	2
-#define I40E_AQ_FLAG_VFE_SHIFT	3
-#define I40E_AQ_FLAG_LB_SHIFT	9
-#define I40E_AQ_FLAG_RD_SHIFT	10
-#define I40E_AQ_FLAG_VFC_SHIFT	11
-#define I40E_AQ_FLAG_BUF_SHIFT	12
-#define I40E_AQ_FLAG_SI_SHIFT	13
-#define I40E_AQ_FLAG_EI_SHIFT	14
-#define I40E_AQ_FLAG_FE_SHIFT	15
-
-#define I40E_AQ_FLAG_DD		BIT(I40E_AQ_FLAG_DD_SHIFT)  /* 0x1    */
-#define I40E_AQ_FLAG_CMP	BIT(I40E_AQ_FLAG_CMP_SHIFT) /* 0x2    */
-#define I40E_AQ_FLAG_ERR	BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4    */
-#define I40E_AQ_FLAG_VFE	BIT(I40E_AQ_FLAG_VFE_SHIFT) /* 0x8    */
-#define I40E_AQ_FLAG_LB		BIT(I40E_AQ_FLAG_LB_SHIFT)  /* 0x200  */
-#define I40E_AQ_FLAG_RD		BIT(I40E_AQ_FLAG_RD_SHIFT)  /* 0x400  */
-#define I40E_AQ_FLAG_VFC	BIT(I40E_AQ_FLAG_VFC_SHIFT) /* 0x800  */
-#define I40E_AQ_FLAG_BUF	BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
-#define I40E_AQ_FLAG_SI		BIT(I40E_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
-#define I40E_AQ_FLAG_EI		BIT(I40E_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
-#define I40E_AQ_FLAG_FE		BIT(I40E_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
-
-/* error codes */
-enum i40e_admin_queue_err {
-	I40E_AQ_RC_OK		= 0,  /* success */
-	I40E_AQ_RC_EPERM	= 1,  /* Operation not permitted */
-	I40E_AQ_RC_ENOENT	= 2,  /* No such element */
-	I40E_AQ_RC_ESRCH	= 3,  /* Bad opcode */
-	I40E_AQ_RC_EINTR	= 4,  /* operation interrupted */
-	I40E_AQ_RC_EIO		= 5,  /* I/O error */
-	I40E_AQ_RC_ENXIO	= 6,  /* No such resource */
-	I40E_AQ_RC_E2BIG	= 7,  /* Arg too long */
-	I40E_AQ_RC_EAGAIN	= 8,  /* Try again */
-	I40E_AQ_RC_ENOMEM	= 9,  /* Out of memory */
-	I40E_AQ_RC_EACCES	= 10, /* Permission denied */
-	I40E_AQ_RC_EFAULT	= 11, /* Bad address */
-	I40E_AQ_RC_EBUSY	= 12, /* Device or resource busy */
-	I40E_AQ_RC_EEXIST	= 13, /* object already exists */
-	I40E_AQ_RC_EINVAL	= 14, /* Invalid argument */
-	I40E_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
-	I40E_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
-	I40E_AQ_RC_ENOSYS	= 17, /* Function not implemented */
-	I40E_AQ_RC_ERANGE	= 18, /* Parameter out of range */
-	I40E_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
-	I40E_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
-	I40E_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
-	I40E_AQ_RC_EFBIG	= 22, /* File too large */
-};
-
-/* Admin Queue command opcodes */
-enum i40e_admin_queue_opc {
-	/* aq commands */
-	i40e_aqc_opc_get_version	= 0x0001,
-	i40e_aqc_opc_driver_version	= 0x0002,
-	i40e_aqc_opc_queue_shutdown	= 0x0003,
-	i40e_aqc_opc_set_pf_context	= 0x0004,
-
-	/* resource ownership */
-	i40e_aqc_opc_request_resource	= 0x0008,
-	i40e_aqc_opc_release_resource	= 0x0009,
-
-	i40e_aqc_opc_list_func_capabilities	= 0x000A,
-	i40e_aqc_opc_list_dev_capabilities	= 0x000B,
-
-	/* Proxy commands */
-	i40e_aqc_opc_set_proxy_config		= 0x0104,
-	i40e_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
-
-	/* LAA */
-	i40e_aqc_opc_mac_address_read	= 0x0107,
-	i40e_aqc_opc_mac_address_write	= 0x0108,
-
-	/* PXE */
-	i40e_aqc_opc_clear_pxe_mode	= 0x0110,
-
-	/* WoL commands */
-	i40e_aqc_opc_set_wol_filter	= 0x0120,
-	i40e_aqc_opc_get_wake_reason	= 0x0121,
-
-	/* internal switch commands */
-	i40e_aqc_opc_get_switch_config		= 0x0200,
-	i40e_aqc_opc_add_statistics		= 0x0201,
-	i40e_aqc_opc_remove_statistics		= 0x0202,
-	i40e_aqc_opc_set_port_parameters	= 0x0203,
-	i40e_aqc_opc_get_switch_resource_alloc	= 0x0204,
-	i40e_aqc_opc_set_switch_config		= 0x0205,
-	i40e_aqc_opc_rx_ctl_reg_read		= 0x0206,
-	i40e_aqc_opc_rx_ctl_reg_write		= 0x0207,
-
-	i40e_aqc_opc_add_vsi			= 0x0210,
-	i40e_aqc_opc_update_vsi_parameters	= 0x0211,
-	i40e_aqc_opc_get_vsi_parameters		= 0x0212,
-
-	i40e_aqc_opc_add_pv			= 0x0220,
-	i40e_aqc_opc_update_pv_parameters	= 0x0221,
-	i40e_aqc_opc_get_pv_parameters		= 0x0222,
-
-	i40e_aqc_opc_add_veb			= 0x0230,
-	i40e_aqc_opc_update_veb_parameters	= 0x0231,
-	i40e_aqc_opc_get_veb_parameters		= 0x0232,
-
-	i40e_aqc_opc_delete_element		= 0x0243,
-
-	i40e_aqc_opc_add_macvlan		= 0x0250,
-	i40e_aqc_opc_remove_macvlan		= 0x0251,
-	i40e_aqc_opc_add_vlan			= 0x0252,
-	i40e_aqc_opc_remove_vlan		= 0x0253,
-	i40e_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
-	i40e_aqc_opc_add_tag			= 0x0255,
-	i40e_aqc_opc_remove_tag			= 0x0256,
-	i40e_aqc_opc_add_multicast_etag		= 0x0257,
-	i40e_aqc_opc_remove_multicast_etag	= 0x0258,
-	i40e_aqc_opc_update_tag			= 0x0259,
-	i40e_aqc_opc_add_control_packet_filter	= 0x025A,
-	i40e_aqc_opc_remove_control_packet_filter	= 0x025B,
-	i40e_aqc_opc_add_cloud_filters		= 0x025C,
-	i40e_aqc_opc_remove_cloud_filters	= 0x025D,
-	i40e_aqc_opc_clear_wol_switch_filters	= 0x025E,
-
-	i40e_aqc_opc_add_mirror_rule	= 0x0260,
-	i40e_aqc_opc_delete_mirror_rule	= 0x0261,
-
-	/* Dynamic Device Personalization */
-	i40e_aqc_opc_write_personalization_profile	= 0x0270,
-	i40e_aqc_opc_get_personalization_profile_list	= 0x0271,
-
-	/* DCB commands */
-	i40e_aqc_opc_dcb_ignore_pfc	= 0x0301,
-	i40e_aqc_opc_dcb_updated	= 0x0302,
-	i40e_aqc_opc_set_dcb_parameters = 0x0303,
-
-	/* TX scheduler */
-	i40e_aqc_opc_configure_vsi_bw_limit		= 0x0400,
-	i40e_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
-	i40e_aqc_opc_configure_vsi_tc_bw		= 0x0407,
-	i40e_aqc_opc_query_vsi_bw_config		= 0x0408,
-	i40e_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
-	i40e_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
-
-	i40e_aqc_opc_enable_switching_comp_ets			= 0x0413,
-	i40e_aqc_opc_modify_switching_comp_ets			= 0x0414,
-	i40e_aqc_opc_disable_switching_comp_ets			= 0x0415,
-	i40e_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
-	i40e_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
-	i40e_aqc_opc_query_switching_comp_ets_config		= 0x0418,
-	i40e_aqc_opc_query_port_ets_config			= 0x0419,
-	i40e_aqc_opc_query_switching_comp_bw_config		= 0x041A,
-	i40e_aqc_opc_suspend_port_tx				= 0x041B,
-	i40e_aqc_opc_resume_port_tx				= 0x041C,
-	i40e_aqc_opc_configure_partition_bw			= 0x041D,
-	/* hmc */
-	i40e_aqc_opc_query_hmc_resource_profile	= 0x0500,
-	i40e_aqc_opc_set_hmc_resource_profile	= 0x0501,
-
-	/* phy commands*/
-	i40e_aqc_opc_get_phy_abilities		= 0x0600,
-	i40e_aqc_opc_set_phy_config		= 0x0601,
-	i40e_aqc_opc_set_mac_config		= 0x0603,
-	i40e_aqc_opc_set_link_restart_an	= 0x0605,
-	i40e_aqc_opc_get_link_status		= 0x0607,
-	i40e_aqc_opc_set_phy_int_mask		= 0x0613,
-	i40e_aqc_opc_get_local_advt_reg		= 0x0614,
-	i40e_aqc_opc_set_local_advt_reg		= 0x0615,
-	i40e_aqc_opc_get_partner_advt		= 0x0616,
-	i40e_aqc_opc_set_lb_modes		= 0x0618,
-	i40e_aqc_opc_get_phy_wol_caps		= 0x0621,
-	i40e_aqc_opc_set_phy_debug		= 0x0622,
-	i40e_aqc_opc_upload_ext_phy_fm		= 0x0625,
-	i40e_aqc_opc_run_phy_activity		= 0x0626,
-	i40e_aqc_opc_set_phy_register		= 0x0628,
-	i40e_aqc_opc_get_phy_register		= 0x0629,
-
-	/* NVM commands */
-	i40e_aqc_opc_nvm_read			= 0x0701,
-	i40e_aqc_opc_nvm_erase			= 0x0702,
-	i40e_aqc_opc_nvm_update			= 0x0703,
-	i40e_aqc_opc_nvm_config_read		= 0x0704,
-	i40e_aqc_opc_nvm_config_write		= 0x0705,
-	i40e_aqc_opc_oem_post_update		= 0x0720,
-	i40e_aqc_opc_thermal_sensor		= 0x0721,
-
-	/* virtualization commands */
-	i40e_aqc_opc_send_msg_to_pf		= 0x0801,
-	i40e_aqc_opc_send_msg_to_vf		= 0x0802,
-	i40e_aqc_opc_send_msg_to_peer		= 0x0803,
-
-	/* alternate structure */
-	i40e_aqc_opc_alternate_write		= 0x0900,
-	i40e_aqc_opc_alternate_write_indirect	= 0x0901,
-	i40e_aqc_opc_alternate_read		= 0x0902,
-	i40e_aqc_opc_alternate_read_indirect	= 0x0903,
-	i40e_aqc_opc_alternate_write_done	= 0x0904,
-	i40e_aqc_opc_alternate_set_mode		= 0x0905,
-	i40e_aqc_opc_alternate_clear_port	= 0x0906,
-
-	/* LLDP commands */
-	i40e_aqc_opc_lldp_get_mib	= 0x0A00,
-	i40e_aqc_opc_lldp_update_mib	= 0x0A01,
-	i40e_aqc_opc_lldp_add_tlv	= 0x0A02,
-	i40e_aqc_opc_lldp_update_tlv	= 0x0A03,
-	i40e_aqc_opc_lldp_delete_tlv	= 0x0A04,
-	i40e_aqc_opc_lldp_stop		= 0x0A05,
-	i40e_aqc_opc_lldp_start		= 0x0A06,
-
-	/* Tunnel commands */
-	i40e_aqc_opc_add_udp_tunnel	= 0x0B00,
-	i40e_aqc_opc_del_udp_tunnel	= 0x0B01,
-	i40e_aqc_opc_set_rss_key	= 0x0B02,
-	i40e_aqc_opc_set_rss_lut	= 0x0B03,
-	i40e_aqc_opc_get_rss_key	= 0x0B04,
-	i40e_aqc_opc_get_rss_lut	= 0x0B05,
-
-	/* Async Events */
-	i40e_aqc_opc_event_lan_overflow		= 0x1001,
-
-	/* OEM commands */
-	i40e_aqc_opc_oem_parameter_change	= 0xFE00,
-	i40e_aqc_opc_oem_device_status_change	= 0xFE01,
-	i40e_aqc_opc_oem_ocsd_initialize	= 0xFE02,
-	i40e_aqc_opc_oem_ocbb_initialize	= 0xFE03,
-
-	/* debug commands */
-	i40e_aqc_opc_debug_read_reg		= 0xFF03,
-	i40e_aqc_opc_debug_write_reg		= 0xFF04,
-	i40e_aqc_opc_debug_modify_reg		= 0xFF07,
-	i40e_aqc_opc_debug_dump_internals	= 0xFF08,
-};
-
-/* command structures and indirect data structures */
-
-/* Structure naming conventions:
- * - no suffix for direct command descriptor structures
- * - _data for indirect sent data
- * - _resp for indirect return data (data which is both will use _data)
- * - _completion for direct return data
- * - _element_ for repeated elements (may also be _data or _resp)
- *
- * Command structures are expected to overlay the params.raw member of the basic
- * descriptor, and as such cannot exceed 16 bytes in length.
- */
-
-/* This macro is used to generate a compilation error if a structure
- * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
- */
-#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \
-	{ i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
-
-/* This macro is used extensively to ensure that command structures are 16
- * bytes in length as they have to map to the raw array of that size.
- */
-#define I40E_CHECK_CMD_LENGTH(X)	I40E_CHECK_STRUCT_LEN(16, X)
-
-/* Queue Shutdown (direct 0x0003) */
-struct i40e_aqc_queue_shutdown {
-	__le32	driver_unloading;
-#define I40E_AQ_DRIVER_UNLOADING	0x1
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
-
-struct i40e_aqc_vsi_properties_data {
-	/* first 96 byte are written by SW */
-	__le16	valid_sections;
-#define I40E_AQ_VSI_PROP_SWITCH_VALID		0x0001
-#define I40E_AQ_VSI_PROP_SECURITY_VALID		0x0002
-#define I40E_AQ_VSI_PROP_VLAN_VALID		0x0004
-#define I40E_AQ_VSI_PROP_CAS_PV_VALID		0x0008
-#define I40E_AQ_VSI_PROP_INGRESS_UP_VALID	0x0010
-#define I40E_AQ_VSI_PROP_EGRESS_UP_VALID	0x0020
-#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
-#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
-#define I40E_AQ_VSI_PROP_OUTER_UP_VALID		0x0100
-#define I40E_AQ_VSI_PROP_SCHED_VALID		0x0200
-	/* switch section */
-	__le16	switch_id; /* 12bit id combined with flags below */
-#define I40E_AQ_VSI_SW_ID_SHIFT		0x0000
-#define I40E_AQ_VSI_SW_ID_MASK		(0xFFF << I40E_AQ_VSI_SW_ID_SHIFT)
-#define I40E_AQ_VSI_SW_ID_FLAG_NOT_STAG	0x1000
-#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
-#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
-	u8	sw_reserved[2];
-	/* security section */
-	u8	sec_flags;
-#define I40E_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	0x01
-#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
-#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
-	u8	sec_reserved;
-	/* VLAN section */
-	__le16	pvid; /* VLANS include priority bits */
-	__le16	fcoe_pvid;
-	u8	port_vlan_flags;
-#define I40E_AQ_VSI_PVLAN_MODE_SHIFT	0x00
-#define I40E_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
-					 I40E_AQ_VSI_PVLAN_MODE_SHIFT)
-#define I40E_AQ_VSI_PVLAN_MODE_TAGGED	0x01
-#define I40E_AQ_VSI_PVLAN_MODE_UNTAGGED	0x02
-#define I40E_AQ_VSI_PVLAN_MODE_ALL	0x03
-#define I40E_AQ_VSI_PVLAN_INSERT_PVID	0x04
-#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
-#define I40E_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
-					 I40E_AQ_VSI_PVLAN_EMOD_SHIFT)
-#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
-#define I40E_AQ_VSI_PVLAN_EMOD_STR_UP	0x08
-#define I40E_AQ_VSI_PVLAN_EMOD_STR	0x10
-#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
-	u8	pvlan_reserved[3];
-	/* ingress egress up sections */
-	__le32	ingress_table; /* bitmap, 3 bits per up */
-#define I40E_AQ_VSI_UP_TABLE_UP0_SHIFT	0
-#define I40E_AQ_VSI_UP_TABLE_UP0_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP0_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP1_SHIFT	3
-#define I40E_AQ_VSI_UP_TABLE_UP1_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP1_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP2_SHIFT	6
-#define I40E_AQ_VSI_UP_TABLE_UP2_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP2_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP3_SHIFT	9
-#define I40E_AQ_VSI_UP_TABLE_UP3_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP3_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP4_SHIFT	12
-#define I40E_AQ_VSI_UP_TABLE_UP4_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP4_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP5_SHIFT	15
-#define I40E_AQ_VSI_UP_TABLE_UP5_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP5_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP6_SHIFT	18
-#define I40E_AQ_VSI_UP_TABLE_UP6_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP6_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP7_SHIFT	21
-#define I40E_AQ_VSI_UP_TABLE_UP7_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP7_SHIFT)
-	__le32	egress_table;   /* same defines as for ingress table */
-	/* cascaded PV section */
-	__le16	cas_pv_tag;
-	u8	cas_pv_flags;
-#define I40E_AQ_VSI_CAS_PV_TAGX_SHIFT		0x00
-#define I40E_AQ_VSI_CAS_PV_TAGX_MASK		(0x03 << \
-						 I40E_AQ_VSI_CAS_PV_TAGX_SHIFT)
-#define I40E_AQ_VSI_CAS_PV_TAGX_LEAVE		0x00
-#define I40E_AQ_VSI_CAS_PV_TAGX_REMOVE		0x01
-#define I40E_AQ_VSI_CAS_PV_TAGX_COPY		0x02
-#define I40E_AQ_VSI_CAS_PV_INSERT_TAG		0x10
-#define I40E_AQ_VSI_CAS_PV_ETAG_PRUNE		0x20
-#define I40E_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG	0x40
-	u8	cas_pv_reserved;
-	/* queue mapping section */
-	__le16	mapping_flags;
-#define I40E_AQ_VSI_QUE_MAP_CONTIG	0x0
-#define I40E_AQ_VSI_QUE_MAP_NONCONTIG	0x1
-	__le16	queue_mapping[16];
-#define I40E_AQ_VSI_QUEUE_SHIFT		0x0
-#define I40E_AQ_VSI_QUEUE_MASK		(0x7FF << I40E_AQ_VSI_QUEUE_SHIFT)
-	__le16	tc_mapping[8];
-#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
-#define I40E_AQ_VSI_TC_QUE_OFFSET_MASK	(0x1FF << \
-					 I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT)
-#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
-#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK	(0x7 << \
-					 I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
-	/* queueing option section */
-	u8	queueing_opt_flags;
-#define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA	0x04
-#define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA	0x08
-#define I40E_AQ_VSI_QUE_OPT_TCP_ENA	0x10
-#define I40E_AQ_VSI_QUE_OPT_FCOE_ENA	0x20
-#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF	0x00
-#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
-	u8	queueing_opt_reserved[3];
-	/* scheduler section */
-	u8	up_enable_bits;
-	u8	sched_reserved;
-	/* outer up section */
-	__le32	outer_up_table; /* same structure and defines as ingress tbl */
-	u8	cmd_reserved[8];
-	/* last 32 bytes are written by FW */
-	__le16	qs_handle[8];
-#define I40E_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
-	__le16	stat_counter_idx;
-	__le16	sched_id;
-	u8	resp_reserved[12];
-};
-
-I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data);
-
-/* Get VEB Parameters (direct 0x0232)
- * uses i40e_aqc_switch_seid for the descriptor
- */
-struct i40e_aqc_get_veb_parameters_completion {
-	__le16	seid;
-	__le16	switch_id;
-	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
-	__le16	statistic_index;
-	__le16	vebs_used;
-	__le16	vebs_free;
-	u8	reserved[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion);
-
-#define I40E_LINK_SPEED_100MB_SHIFT	0x1
-#define I40E_LINK_SPEED_1000MB_SHIFT	0x2
-#define I40E_LINK_SPEED_10GB_SHIFT	0x3
-#define I40E_LINK_SPEED_40GB_SHIFT	0x4
-#define I40E_LINK_SPEED_20GB_SHIFT	0x5
-#define I40E_LINK_SPEED_25GB_SHIFT	0x6
-
-enum i40e_aq_link_speed {
-	I40E_LINK_SPEED_UNKNOWN	= 0,
-	I40E_LINK_SPEED_100MB	= BIT(I40E_LINK_SPEED_100MB_SHIFT),
-	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
-	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
-	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
-	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
-	I40E_LINK_SPEED_25GB	= BIT(I40E_LINK_SPEED_25GB_SHIFT),
-};
-
-/* Send to PF command (indirect 0x0801) id is only used by PF
- * Send to VF command (indirect 0x0802) id is only used by PF
- * Send to Peer PF command (indirect 0x0803)
- */
-struct i40e_aqc_pf_vf_message {
-	__le32	id;
-	u8	reserved[4];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message);
-
-struct i40e_aqc_get_set_rss_key {
-#define I40E_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
-#define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
-#define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
-					I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
-	__le16	vsi_id;
-	u8	reserved[6];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key);
-
-struct i40e_aqc_get_set_rss_key_data {
-	u8 standard_rss_key[0x28];
-	u8 extended_hash_key[0xc];
-};
-
-I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data);
-
-struct  i40e_aqc_get_set_rss_lut {
-#define I40E_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
-#define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
-#define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
-					I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
-	__le16	vsi_id;
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \
-				BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
-
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
-	__le16	flags;
-	u8	reserved[4];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
-#endif /* _I40E_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 272d76b733aa..9fc635d816d2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -109,7 +109,7 @@ struct iavf_q_vector {
 
 /* Helper macros to switch between ints/sec and what the register uses.
  * And yes, it's the same math going both ways.  The lowest value
- * supported by all of the i40e hardware is 8.
+ * supported by all of the iavf hardware is 8.
  */
 #define EITR_INTS_PER_SEC_TO_REG(_eitr) \
 	((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
@@ -171,6 +171,7 @@ enum iavf_state_t {
 	__IAVF_INIT_GET_RESOURCES,	/* aq msg sent, awaiting reply */
 	__IAVF_INIT_SW,		/* got resources, setting up structs */
 	__IAVF_RESETTING,		/* in reset */
+	__IAVF_COMM_FAILED,		/* communication with PF failed */
 	/* Below here, watchdog is running */
 	__IAVF_DOWN,			/* ready, can be opened */
 	__IAVF_DOWN_PENDING,		/* descending, waiting for watchdog */
@@ -216,7 +217,6 @@ struct iavf_cloud_filter {
 
 /* board specific private data structure */
 struct iavf_adapter {
-	struct timer_list watchdog_timer;
 	struct work_struct reset_task;
 	struct work_struct adminq_task;
 	struct delayed_work client_task;
@@ -244,7 +244,7 @@ struct iavf_adapter {
 	int num_iwarp_msix;
 	int iwarp_base_vector;
 	u32 client_pending;
-	struct i40e_client_instance *cinst;
+	struct iavf_client_instance *cinst;
 	struct msix_entry *msix_entries;
 
 	u32 flags;
@@ -303,7 +303,7 @@ struct iavf_adapter {
 	enum iavf_state_t state;
 	unsigned long crit_section;
 
-	struct work_struct watchdog_task;
+	struct delayed_work watchdog_task;
 	bool netdev_registered;
 	bool link_up;
 	enum virtchnl_link_speed link_speed;
@@ -351,7 +351,7 @@ struct iavf_adapter {
 /* Ethtool Private Flags */
 
 /* lan device, used by client interface */
-struct i40e_device {
+struct iavf_device {
 	struct list_head list;
 	struct iavf_adapter *vf;
 };
@@ -359,6 +359,7 @@ struct i40e_device {
 /* needed by iavf_ethtool.c */
 extern char iavf_driver_name[];
 extern const char iavf_driver_version[];
+extern struct workqueue_struct *iavf_wq;
 
 int iavf_up(struct iavf_adapter *adapter);
 void iavf_down(struct iavf_adapter *adapter);
@@ -402,7 +403,7 @@ void iavf_enable_vlan_stripping(struct iavf_adapter *adapter);
 void iavf_disable_vlan_stripping(struct iavf_adapter *adapter);
 void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			      enum virtchnl_ops v_opcode,
-			      iavf_status v_retval, u8 *msg, u16 msglen);
+			      enum iavf_status v_retval, u8 *msg, u16 msglen);
 int iavf_config_rss(struct iavf_adapter *adapter);
 int iavf_lan_add_device(struct iavf_adapter *adapter);
 int iavf_lan_del_device(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
index fca1ecfd9f71..9fa3fa99b4c2 100644
--- a/drivers/net/ethernet/intel/iavf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
@@ -4,16 +4,16 @@
 #include "iavf_status.h"
 #include "iavf_type.h"
 #include "iavf_register.h"
-#include "i40e_adminq.h"
+#include "iavf_adminq.h"
 #include "iavf_prototype.h"
 
 /**
- *  i40e_adminq_init_regs - Initialize AdminQ registers
+ *  iavf_adminq_init_regs - Initialize AdminQ registers
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the alloc_asq and alloc_arq functions have already been called
  **/
-static void i40e_adminq_init_regs(struct iavf_hw *hw)
+static void iavf_adminq_init_regs(struct iavf_hw *hw)
 {
 	/* set head and tail registers in our local struct */
 	hw->aq.asq.tail = IAVF_VF_ATQT1;
@@ -29,24 +29,24 @@ static void i40e_adminq_init_regs(struct iavf_hw *hw)
 }
 
 /**
- *  i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ *  iavf_alloc_adminq_asq_ring - Allocate Admin Queue send rings
  *  @hw: pointer to the hardware structure
  **/
-static iavf_status i40e_alloc_adminq_asq_ring(struct iavf_hw *hw)
+static enum iavf_status iavf_alloc_adminq_asq_ring(struct iavf_hw *hw)
 {
-	iavf_status ret_code;
+	enum iavf_status ret_code;
 
 	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
-					 i40e_mem_atq_ring,
+					 iavf_mem_atq_ring,
 					 (hw->aq.num_asq_entries *
-					 sizeof(struct i40e_aq_desc)),
+					 sizeof(struct iavf_aq_desc)),
 					 IAVF_ADMINQ_DESC_ALIGNMENT);
 	if (ret_code)
 		return ret_code;
 
 	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
 					  (hw->aq.num_asq_entries *
-					  sizeof(struct i40e_asq_cmd_details)));
+					  sizeof(struct iavf_asq_cmd_details)));
 	if (ret_code) {
 		iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 		return ret_code;
@@ -56,55 +56,55 @@ static iavf_status i40e_alloc_adminq_asq_ring(struct iavf_hw *hw)
 }
 
 /**
- *  i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ *  iavf_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
  *  @hw: pointer to the hardware structure
  **/
-static iavf_status i40e_alloc_adminq_arq_ring(struct iavf_hw *hw)
+static enum iavf_status iavf_alloc_adminq_arq_ring(struct iavf_hw *hw)
 {
-	iavf_status ret_code;
+	enum iavf_status ret_code;
 
 	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
-					 i40e_mem_arq_ring,
+					 iavf_mem_arq_ring,
 					 (hw->aq.num_arq_entries *
-					 sizeof(struct i40e_aq_desc)),
+					 sizeof(struct iavf_aq_desc)),
 					 IAVF_ADMINQ_DESC_ALIGNMENT);
 
 	return ret_code;
 }
 
 /**
- *  i40e_free_adminq_asq - Free Admin Queue send rings
+ *  iavf_free_adminq_asq - Free Admin Queue send rings
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the posted send buffers have already been cleaned
  *  and de-allocated
  **/
-static void i40e_free_adminq_asq(struct iavf_hw *hw)
+static void iavf_free_adminq_asq(struct iavf_hw *hw)
 {
 	iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 }
 
 /**
- *  i40e_free_adminq_arq - Free Admin Queue receive rings
+ *  iavf_free_adminq_arq - Free Admin Queue receive rings
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the posted receive buffers have already been cleaned
  *  and de-allocated
  **/
-static void i40e_free_adminq_arq(struct iavf_hw *hw)
+static void iavf_free_adminq_arq(struct iavf_hw *hw)
 {
 	iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf);
 }
 
 /**
- *  i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ *  iavf_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
  *  @hw: pointer to the hardware structure
  **/
-static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw)
+static enum iavf_status iavf_alloc_arq_bufs(struct iavf_hw *hw)
 {
-	struct i40e_aq_desc *desc;
+	struct iavf_aq_desc *desc;
 	struct iavf_dma_mem *bi;
-	iavf_status ret_code;
+	enum iavf_status ret_code;
 	int i;
 
 	/* We'll be allocating the buffer info memory first, then we can
@@ -123,7 +123,7 @@ static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw)
 	for (i = 0; i < hw->aq.num_arq_entries; i++) {
 		bi = &hw->aq.arq.r.arq_bi[i];
 		ret_code = iavf_allocate_dma_mem(hw, bi,
-						 i40e_mem_arq_buf,
+						 iavf_mem_arq_buf,
 						 hw->aq.arq_buf_size,
 						 IAVF_ADMINQ_DESC_ALIGNMENT);
 		if (ret_code)
@@ -132,9 +132,9 @@ static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw)
 		/* now configure the descriptors for use */
 		desc = IAVF_ADMINQ_DESC(hw->aq.arq, i);
 
-		desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
-		if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
-			desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+		desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+		if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+			desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
 		desc->opcode = 0;
 		/* This is in accordance with Admin queue design, there is no
 		 * register for buffer size configuration
@@ -165,13 +165,13 @@ unwind_alloc_arq_bufs:
 }
 
 /**
- *  i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ *  iavf_alloc_asq_bufs - Allocate empty buffer structs for the send queue
  *  @hw: pointer to the hardware structure
  **/
-static iavf_status i40e_alloc_asq_bufs(struct iavf_hw *hw)
+static enum iavf_status iavf_alloc_asq_bufs(struct iavf_hw *hw)
 {
 	struct iavf_dma_mem *bi;
-	iavf_status ret_code;
+	enum iavf_status ret_code;
 	int i;
 
 	/* No mapped memory needed yet, just the buffer info structures */
@@ -186,7 +186,7 @@ static iavf_status i40e_alloc_asq_bufs(struct iavf_hw *hw)
 	for (i = 0; i < hw->aq.num_asq_entries; i++) {
 		bi = &hw->aq.asq.r.asq_bi[i];
 		ret_code = iavf_allocate_dma_mem(hw, bi,
-						 i40e_mem_asq_buf,
+						 iavf_mem_asq_buf,
 						 hw->aq.asq_buf_size,
 						 IAVF_ADMINQ_DESC_ALIGNMENT);
 		if (ret_code)
@@ -206,10 +206,10 @@ unwind_alloc_asq_bufs:
 }
 
 /**
- *  i40e_free_arq_bufs - Free receive queue buffer info elements
+ *  iavf_free_arq_bufs - Free receive queue buffer info elements
  *  @hw: pointer to the hardware structure
  **/
-static void i40e_free_arq_bufs(struct iavf_hw *hw)
+static void iavf_free_arq_bufs(struct iavf_hw *hw)
 {
 	int i;
 
@@ -225,10 +225,10 @@ static void i40e_free_arq_bufs(struct iavf_hw *hw)
 }
 
 /**
- *  i40e_free_asq_bufs - Free send queue buffer info elements
+ *  iavf_free_asq_bufs - Free send queue buffer info elements
  *  @hw: pointer to the hardware structure
  **/
-static void i40e_free_asq_bufs(struct iavf_hw *hw)
+static void iavf_free_asq_bufs(struct iavf_hw *hw)
 {
 	int i;
 
@@ -248,14 +248,14 @@ static void i40e_free_asq_bufs(struct iavf_hw *hw)
 }
 
 /**
- *  i40e_config_asq_regs - configure ASQ registers
+ *  iavf_config_asq_regs - configure ASQ registers
  *  @hw: pointer to the hardware structure
  *
  *  Configure base address and length registers for the transmit queue
  **/
-static iavf_status i40e_config_asq_regs(struct iavf_hw *hw)
+static enum iavf_status iavf_config_asq_regs(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 	u32 reg = 0;
 
 	/* Clear Head and Tail */
@@ -271,20 +271,20 @@ static iavf_status i40e_config_asq_regs(struct iavf_hw *hw)
 	/* Check one register to verify that config was applied */
 	reg = rd32(hw, hw->aq.asq.bal);
 	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
 
 	return ret_code;
 }
 
 /**
- *  i40e_config_arq_regs - ARQ register configuration
+ *  iavf_config_arq_regs - ARQ register configuration
  *  @hw: pointer to the hardware structure
  *
  * Configure base address and length registers for the receive (event queue)
  **/
-static iavf_status i40e_config_arq_regs(struct iavf_hw *hw)
+static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 	u32 reg = 0;
 
 	/* Clear Head and Tail */
@@ -303,13 +303,13 @@ static iavf_status i40e_config_arq_regs(struct iavf_hw *hw)
 	/* Check one register to verify that config was applied */
 	reg = rd32(hw, hw->aq.arq.bal);
 	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
 
 	return ret_code;
 }
 
 /**
- *  i40e_init_asq - main initialization routine for ASQ
+ *  iavf_init_asq - main initialization routine for ASQ
  *  @hw: pointer to the hardware structure
  *
  *  This is the main initialization routine for the Admin Send Queue
@@ -321,20 +321,20 @@ static iavf_status i40e_config_arq_regs(struct iavf_hw *hw)
  *  Do *NOT* hold the lock when calling this as the memory allocation routines
  *  called are not going to be atomic context safe
  **/
-static iavf_status i40e_init_asq(struct iavf_hw *hw)
+static enum iavf_status iavf_init_asq(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	if (hw->aq.asq.count > 0) {
 		/* queue already initialized */
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto init_adminq_exit;
 	}
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_asq_entries == 0) ||
 	    (hw->aq.asq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
@@ -342,17 +342,17 @@ static iavf_status i40e_init_asq(struct iavf_hw *hw)
 	hw->aq.asq.next_to_clean = 0;
 
 	/* allocate the ring memory */
-	ret_code = i40e_alloc_adminq_asq_ring(hw);
+	ret_code = iavf_alloc_adminq_asq_ring(hw);
 	if (ret_code)
 		goto init_adminq_exit;
 
 	/* allocate buffers in the rings */
-	ret_code = i40e_alloc_asq_bufs(hw);
+	ret_code = iavf_alloc_asq_bufs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	ret_code = i40e_config_asq_regs(hw);
+	ret_code = iavf_config_asq_regs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
@@ -361,14 +361,14 @@ static iavf_status i40e_init_asq(struct iavf_hw *hw)
 	goto init_adminq_exit;
 
 init_adminq_free_rings:
-	i40e_free_adminq_asq(hw);
+	iavf_free_adminq_asq(hw);
 
 init_adminq_exit:
 	return ret_code;
 }
 
 /**
- *  i40e_init_arq - initialize ARQ
+ *  iavf_init_arq - initialize ARQ
  *  @hw: pointer to the hardware structure
  *
  *  The main initialization routine for the Admin Receive (Event) Queue.
@@ -380,20 +380,20 @@ init_adminq_exit:
  *  Do *NOT* hold the lock when calling this as the memory allocation routines
  *  called are not going to be atomic context safe
  **/
-static iavf_status i40e_init_arq(struct iavf_hw *hw)
+static enum iavf_status iavf_init_arq(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	if (hw->aq.arq.count > 0) {
 		/* queue already initialized */
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto init_adminq_exit;
 	}
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_arq_entries == 0) ||
 	    (hw->aq.arq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
@@ -401,17 +401,17 @@ static iavf_status i40e_init_arq(struct iavf_hw *hw)
 	hw->aq.arq.next_to_clean = 0;
 
 	/* allocate the ring memory */
-	ret_code = i40e_alloc_adminq_arq_ring(hw);
+	ret_code = iavf_alloc_adminq_arq_ring(hw);
 	if (ret_code)
 		goto init_adminq_exit;
 
 	/* allocate buffers in the rings */
-	ret_code = i40e_alloc_arq_bufs(hw);
+	ret_code = iavf_alloc_arq_bufs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	ret_code = i40e_config_arq_regs(hw);
+	ret_code = iavf_config_arq_regs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
@@ -420,26 +420,26 @@ static iavf_status i40e_init_arq(struct iavf_hw *hw)
 	goto init_adminq_exit;
 
 init_adminq_free_rings:
-	i40e_free_adminq_arq(hw);
+	iavf_free_adminq_arq(hw);
 
 init_adminq_exit:
 	return ret_code;
 }
 
 /**
- *  i40e_shutdown_asq - shutdown the ASQ
+ *  iavf_shutdown_asq - shutdown the ASQ
  *  @hw: pointer to the hardware structure
  *
  *  The main shutdown routine for the Admin Send Queue
  **/
-static iavf_status i40e_shutdown_asq(struct iavf_hw *hw)
+static enum iavf_status iavf_shutdown_asq(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	mutex_lock(&hw->aq.asq_mutex);
 
 	if (hw->aq.asq.count == 0) {
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto shutdown_asq_out;
 	}
 
@@ -453,7 +453,7 @@ static iavf_status i40e_shutdown_asq(struct iavf_hw *hw)
 	hw->aq.asq.count = 0; /* to indicate uninitialized queue */
 
 	/* free ring buffers */
-	i40e_free_asq_bufs(hw);
+	iavf_free_asq_bufs(hw);
 
 shutdown_asq_out:
 	mutex_unlock(&hw->aq.asq_mutex);
@@ -461,19 +461,19 @@ shutdown_asq_out:
 }
 
 /**
- *  i40e_shutdown_arq - shutdown ARQ
+ *  iavf_shutdown_arq - shutdown ARQ
  *  @hw: pointer to the hardware structure
  *
  *  The main shutdown routine for the Admin Receive Queue
  **/
-static iavf_status i40e_shutdown_arq(struct iavf_hw *hw)
+static enum iavf_status iavf_shutdown_arq(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	mutex_lock(&hw->aq.arq_mutex);
 
 	if (hw->aq.arq.count == 0) {
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto shutdown_arq_out;
 	}
 
@@ -487,7 +487,7 @@ static iavf_status i40e_shutdown_arq(struct iavf_hw *hw)
 	hw->aq.arq.count = 0; /* to indicate uninitialized queue */
 
 	/* free ring buffers */
-	i40e_free_arq_bufs(hw);
+	iavf_free_arq_bufs(hw);
 
 shutdown_arq_out:
 	mutex_unlock(&hw->aq.arq_mutex);
@@ -505,32 +505,32 @@ shutdown_arq_out:
  *     - hw->aq.arq_buf_size
  *     - hw->aq.asq_buf_size
  **/
-iavf_status iavf_init_adminq(struct iavf_hw *hw)
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw)
 {
-	iavf_status ret_code;
+	enum iavf_status ret_code;
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_arq_entries == 0) ||
 	    (hw->aq.num_asq_entries == 0) ||
 	    (hw->aq.arq_buf_size == 0) ||
 	    (hw->aq.asq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
 	/* Set up register offsets */
-	i40e_adminq_init_regs(hw);
+	iavf_adminq_init_regs(hw);
 
 	/* setup ASQ command write back timeout */
-	hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT;
+	hw->aq.asq_cmd_timeout = IAVF_ASQ_CMD_TIMEOUT;
 
 	/* allocate the ASQ */
-	ret_code = i40e_init_asq(hw);
+	ret_code = iavf_init_asq(hw);
 	if (ret_code)
 		goto init_adminq_destroy_locks;
 
 	/* allocate the ARQ */
-	ret_code = i40e_init_arq(hw);
+	ret_code = iavf_init_arq(hw);
 	if (ret_code)
 		goto init_adminq_free_asq;
 
@@ -538,7 +538,7 @@ iavf_status iavf_init_adminq(struct iavf_hw *hw)
 	goto init_adminq_exit;
 
 init_adminq_free_asq:
-	i40e_shutdown_asq(hw);
+	iavf_shutdown_asq(hw);
 init_adminq_destroy_locks:
 
 init_adminq_exit:
@@ -549,53 +549,53 @@ init_adminq_exit:
  *  iavf_shutdown_adminq - shutdown routine for the Admin Queue
  *  @hw: pointer to the hardware structure
  **/
-iavf_status iavf_shutdown_adminq(struct iavf_hw *hw)
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw)
 {
-	iavf_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	if (iavf_check_asq_alive(hw))
 		iavf_aq_queue_shutdown(hw, true);
 
-	i40e_shutdown_asq(hw);
-	i40e_shutdown_arq(hw);
+	iavf_shutdown_asq(hw);
+	iavf_shutdown_arq(hw);
 
 	return ret_code;
 }
 
 /**
- *  i40e_clean_asq - cleans Admin send queue
+ *  iavf_clean_asq - cleans Admin send queue
  *  @hw: pointer to the hardware structure
  *
  *  returns the number of free desc
  **/
-static u16 i40e_clean_asq(struct iavf_hw *hw)
+static u16 iavf_clean_asq(struct iavf_hw *hw)
 {
 	struct iavf_adminq_ring *asq = &hw->aq.asq;
-	struct i40e_asq_cmd_details *details;
+	struct iavf_asq_cmd_details *details;
 	u16 ntc = asq->next_to_clean;
-	struct i40e_aq_desc desc_cb;
-	struct i40e_aq_desc *desc;
+	struct iavf_aq_desc desc_cb;
+	struct iavf_aq_desc *desc;
 
 	desc = IAVF_ADMINQ_DESC(*asq, ntc);
-	details = I40E_ADMINQ_DETAILS(*asq, ntc);
+	details = IAVF_ADMINQ_DETAILS(*asq, ntc);
 	while (rd32(hw, hw->aq.asq.head) != ntc) {
 		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
 
 		if (details->callback) {
-			I40E_ADMINQ_CALLBACK cb_func =
-					(I40E_ADMINQ_CALLBACK)details->callback;
+			IAVF_ADMINQ_CALLBACK cb_func =
+					(IAVF_ADMINQ_CALLBACK)details->callback;
 			desc_cb = *desc;
 			cb_func(hw, &desc_cb);
 		}
-		memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+		memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 		memset((void *)details, 0,
-		       sizeof(struct i40e_asq_cmd_details));
+		       sizeof(struct iavf_asq_cmd_details));
 		ntc++;
 		if (ntc == asq->count)
 			ntc = 0;
 		desc = IAVF_ADMINQ_DESC(*asq, ntc);
-		details = I40E_ADMINQ_DETAILS(*asq, ntc);
+		details = IAVF_ADMINQ_DETAILS(*asq, ntc);
 	}
 
 	asq->next_to_clean = ntc;
@@ -629,16 +629,17 @@ bool iavf_asq_done(struct iavf_hw *hw)
  *  This is the main send command driver routine for the Admin Queue send
  *  queue.  It runs the queue, cleans the queue, etc
  **/
-iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
-				  void *buff, /* can be NULL */
-				  u16  buff_size,
-				  struct i40e_asq_cmd_details *cmd_details)
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16  buff_size,
+				       struct iavf_asq_cmd_details *cmd_details)
 {
 	struct iavf_dma_mem *dma_buff = NULL;
-	struct i40e_asq_cmd_details *details;
-	struct i40e_aq_desc *desc_on_ring;
+	struct iavf_asq_cmd_details *details;
+	struct iavf_aq_desc *desc_on_ring;
 	bool cmd_completed = false;
-	iavf_status status = 0;
+	enum iavf_status status = 0;
 	u16  retval = 0;
 	u32  val = 0;
 
@@ -647,21 +648,21 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 	if (hw->aq.asq.count == 0) {
 		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Admin queue not initialized.\n");
-		status = I40E_ERR_QUEUE_EMPTY;
+		status = IAVF_ERR_QUEUE_EMPTY;
 		goto asq_send_command_error;
 	}
 
-	hw->aq.asq_last_status = I40E_AQ_RC_OK;
+	hw->aq.asq_last_status = IAVF_AQ_RC_OK;
 
 	val = rd32(hw, hw->aq.asq.head);
 	if (val >= hw->aq.num_asq_entries) {
 		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: head overrun at %d\n", val);
-		status = I40E_ERR_QUEUE_EMPTY;
+		status = IAVF_ERR_QUEUE_EMPTY;
 		goto asq_send_command_error;
 	}
 
-	details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+	details = IAVF_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
 	if (cmd_details) {
 		*details = *cmd_details;
 
@@ -676,7 +677,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 				cpu_to_le32(lower_32_bits(details->cookie));
 		}
 	} else {
-		memset(details, 0, sizeof(struct i40e_asq_cmd_details));
+		memset(details, 0, sizeof(struct iavf_asq_cmd_details));
 	}
 
 	/* clear requested flags and then set additional flags if defined */
@@ -688,7 +689,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Invalid buffer size: %d.\n",
 			   buff_size);
-		status = I40E_ERR_INVALID_SIZE;
+		status = IAVF_ERR_INVALID_SIZE;
 		goto asq_send_command_error;
 	}
 
@@ -696,7 +697,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 		iavf_debug(hw,
 			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Async flag not set along with postpone flag");
-		status = I40E_ERR_PARAM;
+		status = IAVF_ERR_PARAM;
 		goto asq_send_command_error;
 	}
 
@@ -707,11 +708,11 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 	/* the clean function called here could be called in a separate thread
 	 * in case of asynchronous completions
 	 */
-	if (i40e_clean_asq(hw) == 0) {
+	if (iavf_clean_asq(hw) == 0) {
 		iavf_debug(hw,
 			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Error queue is full.\n");
-		status = I40E_ERR_ADMIN_QUEUE_FULL;
+		status = IAVF_ERR_ADMIN_QUEUE_FULL;
 		goto asq_send_command_error;
 	}
 
@@ -780,13 +781,13 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 			retval &= 0xff;
 		}
 		cmd_completed = true;
-		if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
+		if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_OK)
 			status = 0;
-		else if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_EBUSY)
-			status = I40E_ERR_NOT_READY;
+		else if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_EBUSY)
+			status = IAVF_ERR_NOT_READY;
 		else
-			status = I40E_ERR_ADMIN_QUEUE_ERROR;
-		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
+			status = IAVF_ERR_ADMIN_QUEUE_ERROR;
+		hw->aq.asq_last_status = (enum iavf_admin_queue_err)retval;
 	}
 
 	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
@@ -803,11 +804,11 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
 		if (rd32(hw, hw->aq.asq.len) & IAVF_VF_ATQLEN1_ATQCRIT_MASK) {
 			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 				   "AQTX: AQ Critical error.\n");
-			status = I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
+			status = IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
 		} else {
 			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 				   "AQTX: Writeback timeout.\n");
-			status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+			status = IAVF_ERR_ADMIN_QUEUE_TIMEOUT;
 		}
 	}
 
@@ -823,12 +824,12 @@ asq_send_command_error:
  *
  *  Fill the desc with default values
  **/
-void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode)
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode)
 {
 	/* zero out the desc */
-	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 	desc->opcode = cpu_to_le16(opcode);
-	desc->flags = cpu_to_le16(I40E_AQ_FLAG_SI);
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_SI);
 }
 
 /**
@@ -841,13 +842,13 @@ void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode)
  *  the contents through e.  It can also return how many events are
  *  left to process through 'pending'
  **/
-iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
-				   struct i40e_arq_event_info *e,
-				   u16 *pending)
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *pending)
 {
 	u16 ntc = hw->aq.arq.next_to_clean;
-	struct i40e_aq_desc *desc;
-	iavf_status ret_code = 0;
+	struct iavf_aq_desc *desc;
+	enum iavf_status ret_code = 0;
 	struct iavf_dma_mem *bi;
 	u16 desc_idx;
 	u16 datalen;
@@ -863,7 +864,7 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
 	if (hw->aq.arq.count == 0) {
 		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQRX: Admin queue not initialized.\n");
-		ret_code = I40E_ERR_QUEUE_EMPTY;
+		ret_code = IAVF_ERR_QUEUE_EMPTY;
 		goto clean_arq_element_err;
 	}
 
@@ -871,7 +872,7 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
 	ntu = rd32(hw, hw->aq.arq.head) & IAVF_VF_ARQH1_ARQH_MASK;
 	if (ntu == ntc) {
 		/* nothing to do - shouldn't need to update ring's values */
-		ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_NO_WORK;
 		goto clean_arq_element_out;
 	}
 
@@ -880,10 +881,10 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
 	desc_idx = ntc;
 
 	hw->aq.arq_last_status =
-		(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
+		(enum iavf_admin_queue_err)le16_to_cpu(desc->retval);
 	flags = le16_to_cpu(desc->flags);
-	if (flags & I40E_AQ_FLAG_ERR) {
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+	if (flags & IAVF_AQ_FLAG_ERR) {
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
 		iavf_debug(hw,
 			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQRX: Event received with error 0x%X.\n",
@@ -906,11 +907,11 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
 	 * size
 	 */
 	bi = &hw->aq.arq.r.arq_bi[ntc];
-	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 
-	desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
-	if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
-		desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+	if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+		desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
 	desc->datalen = cpu_to_le16((u16)bi->size);
 	desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
 	desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
index ee983889eab0..baf2fe26f302 100644
--- a/drivers/net/ethernet/intel/iavf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
@@ -6,10 +6,10 @@
 
 #include "iavf_osdep.h"
 #include "iavf_status.h"
-#include "i40e_adminq_cmd.h"
+#include "iavf_adminq_cmd.h"
 
 #define IAVF_ADMINQ_DESC(R, i)   \
-	(&(((struct i40e_aq_desc *)((R).desc_buf.va))[i]))
+	(&(((struct iavf_aq_desc *)((R).desc_buf.va))[i]))
 
 #define IAVF_ADMINQ_DESC_ALIGNMENT 4096
 
@@ -39,22 +39,22 @@ struct iavf_adminq_ring {
 };
 
 /* ASQ transaction details */
-struct i40e_asq_cmd_details {
-	void *callback; /* cast from type I40E_ADMINQ_CALLBACK */
+struct iavf_asq_cmd_details {
+	void *callback; /* cast from type IAVF_ADMINQ_CALLBACK */
 	u64 cookie;
 	u16 flags_ena;
 	u16 flags_dis;
 	bool async;
 	bool postpone;
-	struct i40e_aq_desc *wb_desc;
+	struct iavf_aq_desc *wb_desc;
 };
 
-#define I40E_ADMINQ_DETAILS(R, i)   \
-	(&(((struct i40e_asq_cmd_details *)((R).cmd_buf.va))[i]))
+#define IAVF_ADMINQ_DETAILS(R, i)   \
+	(&(((struct iavf_asq_cmd_details *)((R).cmd_buf.va))[i]))
 
 /* ARQ event information */
-struct i40e_arq_event_info {
-	struct i40e_aq_desc desc;
+struct iavf_arq_event_info {
+	struct iavf_aq_desc desc;
 	u16 msg_len;
 	u16 buf_len;
 	u8 *msg_buf;
@@ -79,45 +79,45 @@ struct iavf_adminq_info {
 	struct mutex arq_mutex; /* Receive queue lock */
 
 	/* last status values on send and receive queues */
-	enum i40e_admin_queue_err asq_last_status;
-	enum i40e_admin_queue_err arq_last_status;
+	enum iavf_admin_queue_err asq_last_status;
+	enum iavf_admin_queue_err arq_last_status;
 };
 
 /**
- * i40e_aq_rc_to_posix - convert errors to user-land codes
+ * iavf_aq_rc_to_posix - convert errors to user-land codes
  * aq_ret: AdminQ handler error code can override aq_rc
  * aq_rc: AdminQ firmware error code to convert
  **/
-static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
+static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc)
 {
 	int aq_to_posix[] = {
-		0,           /* I40E_AQ_RC_OK */
-		-EPERM,      /* I40E_AQ_RC_EPERM */
-		-ENOENT,     /* I40E_AQ_RC_ENOENT */
-		-ESRCH,      /* I40E_AQ_RC_ESRCH */
-		-EINTR,      /* I40E_AQ_RC_EINTR */
-		-EIO,        /* I40E_AQ_RC_EIO */
-		-ENXIO,      /* I40E_AQ_RC_ENXIO */
-		-E2BIG,      /* I40E_AQ_RC_E2BIG */
-		-EAGAIN,     /* I40E_AQ_RC_EAGAIN */
-		-ENOMEM,     /* I40E_AQ_RC_ENOMEM */
-		-EACCES,     /* I40E_AQ_RC_EACCES */
-		-EFAULT,     /* I40E_AQ_RC_EFAULT */
-		-EBUSY,      /* I40E_AQ_RC_EBUSY */
-		-EEXIST,     /* I40E_AQ_RC_EEXIST */
-		-EINVAL,     /* I40E_AQ_RC_EINVAL */
-		-ENOTTY,     /* I40E_AQ_RC_ENOTTY */
-		-ENOSPC,     /* I40E_AQ_RC_ENOSPC */
-		-ENOSYS,     /* I40E_AQ_RC_ENOSYS */
-		-ERANGE,     /* I40E_AQ_RC_ERANGE */
-		-EPIPE,      /* I40E_AQ_RC_EFLUSHED */
-		-ESPIPE,     /* I40E_AQ_RC_BAD_ADDR */
-		-EROFS,      /* I40E_AQ_RC_EMODE */
-		-EFBIG,      /* I40E_AQ_RC_EFBIG */
+		0,           /* IAVF_AQ_RC_OK */
+		-EPERM,      /* IAVF_AQ_RC_EPERM */
+		-ENOENT,     /* IAVF_AQ_RC_ENOENT */
+		-ESRCH,      /* IAVF_AQ_RC_ESRCH */
+		-EINTR,      /* IAVF_AQ_RC_EINTR */
+		-EIO,        /* IAVF_AQ_RC_EIO */
+		-ENXIO,      /* IAVF_AQ_RC_ENXIO */
+		-E2BIG,      /* IAVF_AQ_RC_E2BIG */
+		-EAGAIN,     /* IAVF_AQ_RC_EAGAIN */
+		-ENOMEM,     /* IAVF_AQ_RC_ENOMEM */
+		-EACCES,     /* IAVF_AQ_RC_EACCES */
+		-EFAULT,     /* IAVF_AQ_RC_EFAULT */
+		-EBUSY,      /* IAVF_AQ_RC_EBUSY */
+		-EEXIST,     /* IAVF_AQ_RC_EEXIST */
+		-EINVAL,     /* IAVF_AQ_RC_EINVAL */
+		-ENOTTY,     /* IAVF_AQ_RC_ENOTTY */
+		-ENOSPC,     /* IAVF_AQ_RC_ENOSPC */
+		-ENOSYS,     /* IAVF_AQ_RC_ENOSYS */
+		-ERANGE,     /* IAVF_AQ_RC_ERANGE */
+		-EPIPE,      /* IAVF_AQ_RC_EFLUSHED */
+		-ESPIPE,     /* IAVF_AQ_RC_BAD_ADDR */
+		-EROFS,      /* IAVF_AQ_RC_EMODE */
+		-EFBIG,      /* IAVF_AQ_RC_EFBIG */
 	};
 
 	/* aq_rc is invalid if AQ timed out */
-	if (aq_ret == I40E_ERR_ADMIN_QUEUE_TIMEOUT)
+	if (aq_ret == IAVF_ERR_ADMIN_QUEUE_TIMEOUT)
 		return -EAGAIN;
 
 	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
@@ -127,9 +127,9 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
 }
 
 /* general information */
-#define I40E_AQ_LARGE_BUF	512
-#define I40E_ASQ_CMD_TIMEOUT	250000  /* usecs */
+#define IAVF_AQ_LARGE_BUF	512
+#define IAVF_ASQ_CMD_TIMEOUT	250000  /* usecs */
 
-void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode);
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode);
 
 #endif /* _IAVF_ADMINQ_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h
new file mode 100644
index 000000000000..bc512308557b
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h
@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ADMINQ_CMD_H_
+#define _IAVF_ADMINQ_CMD_H_
+
+/* This header file defines the iavf Admin Queue commands and is shared between
+ * iavf Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define IAVF_FW_API_VERSION_MAJOR	0x0001
+#define IAVF_FW_API_VERSION_MINOR_X722	0x0005
+#define IAVF_FW_API_VERSION_MINOR_X710	0x0008
+
+#define IAVF_FW_MINOR_VERSION(_h) ((_h)->mac.type == IAVF_MAC_XL710 ? \
+					IAVF_FW_API_VERSION_MINOR_X710 : \
+					IAVF_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define IAVF_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+
+struct iavf_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 param2;
+			__le32 param3;
+		} internal;
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 addr_high;
+			__le32 addr_low;
+		} external;
+		u8 raw[16];
+	} params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define IAVF_AQ_FLAG_DD_SHIFT	0
+#define IAVF_AQ_FLAG_CMP_SHIFT	1
+#define IAVF_AQ_FLAG_ERR_SHIFT	2
+#define IAVF_AQ_FLAG_VFE_SHIFT	3
+#define IAVF_AQ_FLAG_LB_SHIFT	9
+#define IAVF_AQ_FLAG_RD_SHIFT	10
+#define IAVF_AQ_FLAG_VFC_SHIFT	11
+#define IAVF_AQ_FLAG_BUF_SHIFT	12
+#define IAVF_AQ_FLAG_SI_SHIFT	13
+#define IAVF_AQ_FLAG_EI_SHIFT	14
+#define IAVF_AQ_FLAG_FE_SHIFT	15
+
+#define IAVF_AQ_FLAG_DD		BIT(IAVF_AQ_FLAG_DD_SHIFT)  /* 0x1    */
+#define IAVF_AQ_FLAG_CMP	BIT(IAVF_AQ_FLAG_CMP_SHIFT) /* 0x2    */
+#define IAVF_AQ_FLAG_ERR	BIT(IAVF_AQ_FLAG_ERR_SHIFT) /* 0x4    */
+#define IAVF_AQ_FLAG_VFE	BIT(IAVF_AQ_FLAG_VFE_SHIFT) /* 0x8    */
+#define IAVF_AQ_FLAG_LB		BIT(IAVF_AQ_FLAG_LB_SHIFT)  /* 0x200  */
+#define IAVF_AQ_FLAG_RD		BIT(IAVF_AQ_FLAG_RD_SHIFT)  /* 0x400  */
+#define IAVF_AQ_FLAG_VFC	BIT(IAVF_AQ_FLAG_VFC_SHIFT) /* 0x800  */
+#define IAVF_AQ_FLAG_BUF	BIT(IAVF_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define IAVF_AQ_FLAG_SI		BIT(IAVF_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
+#define IAVF_AQ_FLAG_EI		BIT(IAVF_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
+#define IAVF_AQ_FLAG_FE		BIT(IAVF_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
+
+/* error codes */
+enum iavf_admin_queue_err {
+	IAVF_AQ_RC_OK		= 0,  /* success */
+	IAVF_AQ_RC_EPERM	= 1,  /* Operation not permitted */
+	IAVF_AQ_RC_ENOENT	= 2,  /* No such element */
+	IAVF_AQ_RC_ESRCH	= 3,  /* Bad opcode */
+	IAVF_AQ_RC_EINTR	= 4,  /* operation interrupted */
+	IAVF_AQ_RC_EIO		= 5,  /* I/O error */
+	IAVF_AQ_RC_ENXIO	= 6,  /* No such resource */
+	IAVF_AQ_RC_E2BIG	= 7,  /* Arg too long */
+	IAVF_AQ_RC_EAGAIN	= 8,  /* Try again */
+	IAVF_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	IAVF_AQ_RC_EACCES	= 10, /* Permission denied */
+	IAVF_AQ_RC_EFAULT	= 11, /* Bad address */
+	IAVF_AQ_RC_EBUSY	= 12, /* Device or resource busy */
+	IAVF_AQ_RC_EEXIST	= 13, /* object already exists */
+	IAVF_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	IAVF_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
+	IAVF_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
+	IAVF_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	IAVF_AQ_RC_ERANGE	= 18, /* Parameter out of range */
+	IAVF_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
+	IAVF_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
+	IAVF_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
+	IAVF_AQ_RC_EFBIG	= 22, /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum iavf_admin_queue_opc {
+	/* aq commands */
+	iavf_aqc_opc_get_version	= 0x0001,
+	iavf_aqc_opc_driver_version	= 0x0002,
+	iavf_aqc_opc_queue_shutdown	= 0x0003,
+	iavf_aqc_opc_set_pf_context	= 0x0004,
+
+	/* resource ownership */
+	iavf_aqc_opc_request_resource	= 0x0008,
+	iavf_aqc_opc_release_resource	= 0x0009,
+
+	iavf_aqc_opc_list_func_capabilities	= 0x000A,
+	iavf_aqc_opc_list_dev_capabilities	= 0x000B,
+
+	/* Proxy commands */
+	iavf_aqc_opc_set_proxy_config		= 0x0104,
+	iavf_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
+
+	/* LAA */
+	iavf_aqc_opc_mac_address_read	= 0x0107,
+	iavf_aqc_opc_mac_address_write	= 0x0108,
+
+	/* PXE */
+	iavf_aqc_opc_clear_pxe_mode	= 0x0110,
+
+	/* WoL commands */
+	iavf_aqc_opc_set_wol_filter	= 0x0120,
+	iavf_aqc_opc_get_wake_reason	= 0x0121,
+
+	/* internal switch commands */
+	iavf_aqc_opc_get_switch_config		= 0x0200,
+	iavf_aqc_opc_add_statistics		= 0x0201,
+	iavf_aqc_opc_remove_statistics		= 0x0202,
+	iavf_aqc_opc_set_port_parameters	= 0x0203,
+	iavf_aqc_opc_get_switch_resource_alloc	= 0x0204,
+	iavf_aqc_opc_set_switch_config		= 0x0205,
+	iavf_aqc_opc_rx_ctl_reg_read		= 0x0206,
+	iavf_aqc_opc_rx_ctl_reg_write		= 0x0207,
+
+	iavf_aqc_opc_add_vsi			= 0x0210,
+	iavf_aqc_opc_update_vsi_parameters	= 0x0211,
+	iavf_aqc_opc_get_vsi_parameters		= 0x0212,
+
+	iavf_aqc_opc_add_pv			= 0x0220,
+	iavf_aqc_opc_update_pv_parameters	= 0x0221,
+	iavf_aqc_opc_get_pv_parameters		= 0x0222,
+
+	iavf_aqc_opc_add_veb			= 0x0230,
+	iavf_aqc_opc_update_veb_parameters	= 0x0231,
+	iavf_aqc_opc_get_veb_parameters		= 0x0232,
+
+	iavf_aqc_opc_delete_element		= 0x0243,
+
+	iavf_aqc_opc_add_macvlan		= 0x0250,
+	iavf_aqc_opc_remove_macvlan		= 0x0251,
+	iavf_aqc_opc_add_vlan			= 0x0252,
+	iavf_aqc_opc_remove_vlan		= 0x0253,
+	iavf_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
+	iavf_aqc_opc_add_tag			= 0x0255,
+	iavf_aqc_opc_remove_tag			= 0x0256,
+	iavf_aqc_opc_add_multicast_etag		= 0x0257,
+	iavf_aqc_opc_remove_multicast_etag	= 0x0258,
+	iavf_aqc_opc_update_tag			= 0x0259,
+	iavf_aqc_opc_add_control_packet_filter	= 0x025A,
+	iavf_aqc_opc_remove_control_packet_filter	= 0x025B,
+	iavf_aqc_opc_add_cloud_filters		= 0x025C,
+	iavf_aqc_opc_remove_cloud_filters	= 0x025D,
+	iavf_aqc_opc_clear_wol_switch_filters	= 0x025E,
+
+	iavf_aqc_opc_add_mirror_rule	= 0x0260,
+	iavf_aqc_opc_delete_mirror_rule	= 0x0261,
+
+	/* Dynamic Device Personalization */
+	iavf_aqc_opc_write_personalization_profile	= 0x0270,
+	iavf_aqc_opc_get_personalization_profile_list	= 0x0271,
+
+	/* DCB commands */
+	iavf_aqc_opc_dcb_ignore_pfc	= 0x0301,
+	iavf_aqc_opc_dcb_updated	= 0x0302,
+	iavf_aqc_opc_set_dcb_parameters = 0x0303,
+
+	/* TX scheduler */
+	iavf_aqc_opc_configure_vsi_bw_limit		= 0x0400,
+	iavf_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
+	iavf_aqc_opc_configure_vsi_tc_bw		= 0x0407,
+	iavf_aqc_opc_query_vsi_bw_config		= 0x0408,
+	iavf_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
+	iavf_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
+
+	iavf_aqc_opc_enable_switching_comp_ets			= 0x0413,
+	iavf_aqc_opc_modify_switching_comp_ets			= 0x0414,
+	iavf_aqc_opc_disable_switching_comp_ets			= 0x0415,
+	iavf_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
+	iavf_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
+	iavf_aqc_opc_query_switching_comp_ets_config		= 0x0418,
+	iavf_aqc_opc_query_port_ets_config			= 0x0419,
+	iavf_aqc_opc_query_switching_comp_bw_config		= 0x041A,
+	iavf_aqc_opc_suspend_port_tx				= 0x041B,
+	iavf_aqc_opc_resume_port_tx				= 0x041C,
+	iavf_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	iavf_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	iavf_aqc_opc_set_hmc_resource_profile	= 0x0501,
+
+	/* phy commands*/
+	iavf_aqc_opc_get_phy_abilities		= 0x0600,
+	iavf_aqc_opc_set_phy_config		= 0x0601,
+	iavf_aqc_opc_set_mac_config		= 0x0603,
+	iavf_aqc_opc_set_link_restart_an	= 0x0605,
+	iavf_aqc_opc_get_link_status		= 0x0607,
+	iavf_aqc_opc_set_phy_int_mask		= 0x0613,
+	iavf_aqc_opc_get_local_advt_reg		= 0x0614,
+	iavf_aqc_opc_set_local_advt_reg		= 0x0615,
+	iavf_aqc_opc_get_partner_advt		= 0x0616,
+	iavf_aqc_opc_set_lb_modes		= 0x0618,
+	iavf_aqc_opc_get_phy_wol_caps		= 0x0621,
+	iavf_aqc_opc_set_phy_debug		= 0x0622,
+	iavf_aqc_opc_upload_ext_phy_fm		= 0x0625,
+	iavf_aqc_opc_run_phy_activity		= 0x0626,
+	iavf_aqc_opc_set_phy_register		= 0x0628,
+	iavf_aqc_opc_get_phy_register		= 0x0629,
+
+	/* NVM commands */
+	iavf_aqc_opc_nvm_read			= 0x0701,
+	iavf_aqc_opc_nvm_erase			= 0x0702,
+	iavf_aqc_opc_nvm_update			= 0x0703,
+	iavf_aqc_opc_nvm_config_read		= 0x0704,
+	iavf_aqc_opc_nvm_config_write		= 0x0705,
+	iavf_aqc_opc_oem_post_update		= 0x0720,
+	iavf_aqc_opc_thermal_sensor		= 0x0721,
+
+	/* virtualization commands */
+	iavf_aqc_opc_send_msg_to_pf		= 0x0801,
+	iavf_aqc_opc_send_msg_to_vf		= 0x0802,
+	iavf_aqc_opc_send_msg_to_peer		= 0x0803,
+
+	/* alternate structure */
+	iavf_aqc_opc_alternate_write		= 0x0900,
+	iavf_aqc_opc_alternate_write_indirect	= 0x0901,
+	iavf_aqc_opc_alternate_read		= 0x0902,
+	iavf_aqc_opc_alternate_read_indirect	= 0x0903,
+	iavf_aqc_opc_alternate_write_done	= 0x0904,
+	iavf_aqc_opc_alternate_set_mode		= 0x0905,
+	iavf_aqc_opc_alternate_clear_port	= 0x0906,
+
+	/* LLDP commands */
+	iavf_aqc_opc_lldp_get_mib	= 0x0A00,
+	iavf_aqc_opc_lldp_update_mib	= 0x0A01,
+	iavf_aqc_opc_lldp_add_tlv	= 0x0A02,
+	iavf_aqc_opc_lldp_update_tlv	= 0x0A03,
+	iavf_aqc_opc_lldp_delete_tlv	= 0x0A04,
+	iavf_aqc_opc_lldp_stop		= 0x0A05,
+	iavf_aqc_opc_lldp_start		= 0x0A06,
+
+	/* Tunnel commands */
+	iavf_aqc_opc_add_udp_tunnel	= 0x0B00,
+	iavf_aqc_opc_del_udp_tunnel	= 0x0B01,
+	iavf_aqc_opc_set_rss_key	= 0x0B02,
+	iavf_aqc_opc_set_rss_lut	= 0x0B03,
+	iavf_aqc_opc_get_rss_key	= 0x0B04,
+	iavf_aqc_opc_get_rss_lut	= 0x0B05,
+
+	/* Async Events */
+	iavf_aqc_opc_event_lan_overflow		= 0x1001,
+
+	/* OEM commands */
+	iavf_aqc_opc_oem_parameter_change	= 0xFE00,
+	iavf_aqc_opc_oem_device_status_change	= 0xFE01,
+	iavf_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	iavf_aqc_opc_oem_ocbb_initialize	= 0xFE03,
+
+	/* debug commands */
+	iavf_aqc_opc_debug_read_reg		= 0xFF03,
+	iavf_aqc_opc_debug_write_reg		= 0xFF04,
+	iavf_aqc_opc_debug_modify_reg		= 0xFF07,
+	iavf_aqc_opc_debug_dump_internals	= 0xFF08,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define IAVF_CHECK_STRUCT_LEN(n, X) enum iavf_static_assert_enum_##X \
+	{ iavf_static_assert_##X = (n) / ((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define IAVF_CHECK_CMD_LENGTH(X)	IAVF_CHECK_STRUCT_LEN(16, X)
+
+/* Queue Shutdown (direct 0x0003) */
+struct iavf_aqc_queue_shutdown {
+	__le32	driver_unloading;
+#define IAVF_AQ_DRIVER_UNLOADING	0x1
+	u8	reserved[12];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_queue_shutdown);
+
+struct iavf_aqc_vsi_properties_data {
+	/* first 96 byte are written by SW */
+	__le16	valid_sections;
+#define IAVF_AQ_VSI_PROP_SWITCH_VALID		0x0001
+#define IAVF_AQ_VSI_PROP_SECURITY_VALID		0x0002
+#define IAVF_AQ_VSI_PROP_VLAN_VALID		0x0004
+#define IAVF_AQ_VSI_PROP_CAS_PV_VALID		0x0008
+#define IAVF_AQ_VSI_PROP_INGRESS_UP_VALID	0x0010
+#define IAVF_AQ_VSI_PROP_EGRESS_UP_VALID	0x0020
+#define IAVF_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
+#define IAVF_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
+#define IAVF_AQ_VSI_PROP_OUTER_UP_VALID		0x0100
+#define IAVF_AQ_VSI_PROP_SCHED_VALID		0x0200
+	/* switch section */
+	__le16	switch_id; /* 12bit id combined with flags below */
+#define IAVF_AQ_VSI_SW_ID_SHIFT		0x0000
+#define IAVF_AQ_VSI_SW_ID_MASK		(0xFFF << IAVF_AQ_VSI_SW_ID_SHIFT)
+#define IAVF_AQ_VSI_SW_ID_FLAG_NOT_STAG	0x1000
+#define IAVF_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
+#define IAVF_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
+	u8	sw_reserved[2];
+	/* security section */
+	u8	sec_flags;
+#define IAVF_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	0x01
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
+	u8	sec_reserved;
+	/* VLAN section */
+	__le16	pvid; /* VLANS include priority bits */
+	__le16	fcoe_pvid;
+	u8	port_vlan_flags;
+#define IAVF_AQ_VSI_PVLAN_MODE_SHIFT	0x00
+#define IAVF_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
+					 IAVF_AQ_VSI_PVLAN_MODE_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_MODE_TAGGED	0x01
+#define IAVF_AQ_VSI_PVLAN_MODE_UNTAGGED	0x02
+#define IAVF_AQ_VSI_PVLAN_MODE_ALL	0x03
+#define IAVF_AQ_VSI_PVLAN_INSERT_PVID	0x04
+#define IAVF_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
+#define IAVF_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
+					 IAVF_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_UP	0x08
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR	0x10
+#define IAVF_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
+	u8	pvlan_reserved[3];
+	/* ingress egress up sections */
+	__le32	ingress_table; /* bitmap, 3 bits per up */
+#define IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT	0
+#define IAVF_AQ_VSI_UP_TABLE_UP0_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT	3
+#define IAVF_AQ_VSI_UP_TABLE_UP1_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT	6
+#define IAVF_AQ_VSI_UP_TABLE_UP2_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT	9
+#define IAVF_AQ_VSI_UP_TABLE_UP3_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT	12
+#define IAVF_AQ_VSI_UP_TABLE_UP4_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT	15
+#define IAVF_AQ_VSI_UP_TABLE_UP5_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT	18
+#define IAVF_AQ_VSI_UP_TABLE_UP6_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT	21
+#define IAVF_AQ_VSI_UP_TABLE_UP7_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT)
+	__le32	egress_table;   /* same defines as for ingress table */
+	/* cascaded PV section */
+	__le16	cas_pv_tag;
+	u8	cas_pv_flags;
+#define IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_MASK		(0x03 << \
+						 IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT)
+#define IAVF_AQ_VSI_CAS_PV_TAGX_LEAVE		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_REMOVE		0x01
+#define IAVF_AQ_VSI_CAS_PV_TAGX_COPY		0x02
+#define IAVF_AQ_VSI_CAS_PV_INSERT_TAG		0x10
+#define IAVF_AQ_VSI_CAS_PV_ETAG_PRUNE		0x20
+#define IAVF_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG	0x40
+	u8	cas_pv_reserved;
+	/* queue mapping section */
+	__le16	mapping_flags;
+#define IAVF_AQ_VSI_QUE_MAP_CONTIG	0x0
+#define IAVF_AQ_VSI_QUE_MAP_NONCONTIG	0x1
+	__le16	queue_mapping[16];
+#define IAVF_AQ_VSI_QUEUE_SHIFT		0x0
+#define IAVF_AQ_VSI_QUEUE_MASK		(0x7FF << IAVF_AQ_VSI_QUEUE_SHIFT)
+	__le16	tc_mapping[8];
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_MASK	(0x1FF << \
+					 IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT)
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_MASK	(0x7 << \
+					 IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+	/* queueing option section */
+	u8	queueing_opt_flags;
+#define IAVF_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA	0x04
+#define IAVF_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA	0x08
+#define IAVF_AQ_VSI_QUE_OPT_TCP_ENA	0x10
+#define IAVF_AQ_VSI_QUE_OPT_FCOE_ENA	0x20
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_PF	0x00
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
+	u8	queueing_opt_reserved[3];
+	/* scheduler section */
+	u8	up_enable_bits;
+	u8	sched_reserved;
+	/* outer up section */
+	__le32	outer_up_table; /* same structure and defines as ingress tbl */
+	u8	cmd_reserved[8];
+	/* last 32 bytes are written by FW */
+	__le16	qs_handle[8];
+#define IAVF_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
+	__le16	stat_counter_idx;
+	__le16	sched_id;
+	u8	resp_reserved[12];
+};
+
+IAVF_CHECK_STRUCT_LEN(128, iavf_aqc_vsi_properties_data);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses iavf_aqc_switch_seid for the descriptor
+ */
+struct iavf_aqc_get_veb_parameters_completion {
+	__le16	seid;
+	__le16	switch_id;
+	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
+	__le16	statistic_index;
+	__le16	vebs_used;
+	__le16	vebs_free;
+	u8	reserved[4];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_veb_parameters_completion);
+
+#define IAVF_LINK_SPEED_100MB_SHIFT	0x1
+#define IAVF_LINK_SPEED_1000MB_SHIFT	0x2
+#define IAVF_LINK_SPEED_10GB_SHIFT	0x3
+#define IAVF_LINK_SPEED_40GB_SHIFT	0x4
+#define IAVF_LINK_SPEED_20GB_SHIFT	0x5
+#define IAVF_LINK_SPEED_25GB_SHIFT	0x6
+
+enum iavf_aq_link_speed {
+	IAVF_LINK_SPEED_UNKNOWN	= 0,
+	IAVF_LINK_SPEED_100MB	= BIT(IAVF_LINK_SPEED_100MB_SHIFT),
+	IAVF_LINK_SPEED_1GB	= BIT(IAVF_LINK_SPEED_1000MB_SHIFT),
+	IAVF_LINK_SPEED_10GB	= BIT(IAVF_LINK_SPEED_10GB_SHIFT),
+	IAVF_LINK_SPEED_40GB	= BIT(IAVF_LINK_SPEED_40GB_SHIFT),
+	IAVF_LINK_SPEED_20GB	= BIT(IAVF_LINK_SPEED_20GB_SHIFT),
+	IAVF_LINK_SPEED_25GB	= BIT(IAVF_LINK_SPEED_25GB_SHIFT),
+};
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct iavf_aqc_pf_vf_message {
+	__le32	id;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_pf_vf_message);
+
+struct iavf_aqc_get_set_rss_key {
+#define IAVF_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
+	__le16	vsi_id;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_key);
+
+struct iavf_aqc_get_set_rss_key_data {
+	u8 standard_rss_key[0x28];
+	u8 extended_hash_key[0xc];
+};
+
+IAVF_CHECK_STRUCT_LEN(0x34, iavf_aqc_get_set_rss_key_data);
+
+struct  iavf_aqc_get_set_rss_lut {
+#define IAVF_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
+	__le16	vsi_id;
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \
+				BIT(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
+	__le16	flags;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_lut);
+#endif /* _IAVF_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h
index bf2753146f30..2711573c14ec 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_alloc.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h
@@ -20,12 +20,15 @@ enum iavf_memory_type {
 };
 
 /* prototype for functions used for dynamic memory allocation */
-iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem,
-				  enum iavf_memory_type type,
-				  u64 size, u32 alignment);
-iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem);
-iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw,
-				   struct iavf_virt_mem *mem, u32 size);
-iavf_status iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem);
+enum iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw,
+				       struct iavf_dma_mem *mem,
+				       enum iavf_memory_type type,
+				       u64 size, u32 alignment);
+enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw,
+				   struct iavf_dma_mem *mem);
+enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw,
+					struct iavf_virt_mem *mem, u32 size);
+enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw,
+				    struct iavf_virt_mem *mem);
 
 #endif /* _IAVF_ALLOC_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.c b/drivers/net/ethernet/intel/iavf/iavf_client.c
index aea45364fd1c..0c77e4171808 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_client.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.c
@@ -10,19 +10,19 @@
 
 static
 const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR;
-static struct i40e_client *vf_registered_client;
-static LIST_HEAD(i40e_devices);
+static struct iavf_client *vf_registered_client;
+static LIST_HEAD(iavf_devices);
 static DEFINE_MUTEX(iavf_device_mutex);
 
-static u32 iavf_client_virtchnl_send(struct i40e_info *ldev,
-				     struct i40e_client *client,
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
 				     u8 *msg, u16 len);
 
-static int iavf_client_setup_qvlist(struct i40e_info *ldev,
-				    struct i40e_client *client,
-				    struct i40e_qvlist_info *qvlist_info);
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info);
 
-static struct i40e_ops iavf_lan_ops = {
+static struct iavf_ops iavf_lan_ops = {
 	.virtchnl_send = iavf_client_virtchnl_send,
 	.setup_qvlist = iavf_client_setup_qvlist,
 };
@@ -33,11 +33,11 @@ static struct i40e_ops iavf_lan_ops = {
  * @params: client param struct
  **/
 static
-void iavf_client_get_params(struct iavf_vsi *vsi, struct i40e_params *params)
+void iavf_client_get_params(struct iavf_vsi *vsi, struct iavf_params *params)
 {
 	int i;
 
-	memset(params, 0, sizeof(struct i40e_params));
+	memset(params, 0, sizeof(struct iavf_params));
 	params->mtu = vsi->netdev->mtu;
 	params->link_up = vsi->back->link_up;
 
@@ -57,7 +57,7 @@ void iavf_client_get_params(struct iavf_vsi *vsi, struct i40e_params *params)
  **/
 void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len)
 {
-	struct i40e_client_instance *cinst;
+	struct iavf_client_instance *cinst;
 
 	if (!vsi)
 		return;
@@ -81,8 +81,8 @@ void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len)
  **/
 void iavf_notify_client_l2_params(struct iavf_vsi *vsi)
 {
-	struct i40e_client_instance *cinst;
-	struct i40e_params params;
+	struct iavf_client_instance *cinst;
+	struct iavf_params params;
 
 	if (!vsi)
 		return;
@@ -110,7 +110,7 @@ void iavf_notify_client_l2_params(struct iavf_vsi *vsi)
 void iavf_notify_client_open(struct iavf_vsi *vsi)
 {
 	struct iavf_adapter *adapter = vsi->back;
-	struct i40e_client_instance *cinst = adapter->cinst;
+	struct iavf_client_instance *cinst = adapter->cinst;
 	int ret;
 
 	if (!cinst || !cinst->client || !cinst->client->ops ||
@@ -119,10 +119,10 @@ void iavf_notify_client_open(struct iavf_vsi *vsi)
 			"Cannot locate client instance open function\n");
 		return;
 	}
-	if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) {
+	if (!(test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state))) {
 		ret = cinst->client->ops->open(&cinst->lan_info, cinst->client);
 		if (!ret)
-			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
 	}
 }
 
@@ -132,17 +132,17 @@ void iavf_notify_client_open(struct iavf_vsi *vsi)
  *
  * Return 0 on success or < 0 on error
  **/
-static int iavf_client_release_qvlist(struct i40e_info *ldev)
+static int iavf_client_release_qvlist(struct iavf_info *ldev)
 {
 	struct iavf_adapter *adapter = ldev->vf;
-	iavf_status err;
+	enum iavf_status err;
 
 	if (adapter->aq_required)
 		return -EAGAIN;
 
 	err = iavf_aq_send_msg_to_pf(&adapter->hw,
 				     VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
-				     I40E_SUCCESS, NULL, 0, NULL);
+				     IAVF_SUCCESS, NULL, 0, NULL);
 
 	if (err)
 		dev_err(&adapter->pdev->dev,
@@ -162,7 +162,7 @@ static int iavf_client_release_qvlist(struct i40e_info *ldev)
 void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset)
 {
 	struct iavf_adapter *adapter = vsi->back;
-	struct i40e_client_instance *cinst = adapter->cinst;
+	struct iavf_client_instance *cinst = adapter->cinst;
 
 	if (!cinst || !cinst->client || !cinst->client->ops ||
 	    !cinst->client->ops->close) {
@@ -172,7 +172,7 @@ void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset)
 	}
 	cinst->client->ops->close(&cinst->lan_info, cinst->client, reset);
 	iavf_client_release_qvlist(&cinst->lan_info);
-	clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+	clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
 }
 
 /**
@@ -181,13 +181,13 @@ void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset)
  *
  * Returns cinst ptr on success, NULL on failure
  **/
-static struct i40e_client_instance *
+static struct iavf_client_instance *
 iavf_client_add_instance(struct iavf_adapter *adapter)
 {
-	struct i40e_client_instance *cinst = NULL;
+	struct iavf_client_instance *cinst = NULL;
 	struct iavf_vsi *vsi = &adapter->vsi;
 	struct netdev_hw_addr *mac = NULL;
-	struct i40e_params params;
+	struct iavf_params params;
 
 	if (!vf_registered_client)
 		goto out;
@@ -205,7 +205,7 @@ iavf_client_add_instance(struct iavf_adapter *adapter)
 	cinst->lan_info.netdev = vsi->netdev;
 	cinst->lan_info.pcidev = adapter->pdev;
 	cinst->lan_info.fid = 0;
-	cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF;
+	cinst->lan_info.ftype = IAVF_CLIENT_FTYPE_VF;
 	cinst->lan_info.hw_addr = adapter->hw.hw_addr;
 	cinst->lan_info.ops = &iavf_lan_ops;
 	cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR;
@@ -213,7 +213,7 @@ iavf_client_add_instance(struct iavf_adapter *adapter)
 	cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD;
 	iavf_client_get_params(vsi, &params);
 	cinst->lan_info.params = params;
-	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state);
+	set_bit(__IAVF_CLIENT_INSTANCE_NONE, &cinst->state);
 
 	cinst->lan_info.msix_count = adapter->num_iwarp_msix;
 	cinst->lan_info.msix_entries =
@@ -250,8 +250,8 @@ void iavf_client_del_instance(struct iavf_adapter *adapter)
  **/
 void iavf_client_subtask(struct iavf_adapter *adapter)
 {
-	struct i40e_client *client = vf_registered_client;
-	struct i40e_client_instance *cinst;
+	struct iavf_client *client = vf_registered_client;
+	struct iavf_client_instance *cinst;
 	int ret = 0;
 
 	if (adapter->state < __IAVF_DOWN)
@@ -269,13 +269,13 @@ void iavf_client_subtask(struct iavf_adapter *adapter)
 	dev_info(&adapter->pdev->dev, "Added instance of Client %s\n",
 		 client->name);
 
-	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+	if (!test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
 		/* Send an Open request to the client */
 
 		if (client->ops && client->ops->open)
 			ret = client->ops->open(&cinst->lan_info, client);
 		if (!ret)
-			set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED,
 				&cinst->state);
 		else
 			/* remove client instance */
@@ -291,11 +291,11 @@ void iavf_client_subtask(struct iavf_adapter *adapter)
  **/
 int iavf_lan_add_device(struct iavf_adapter *adapter)
 {
-	struct i40e_device *ldev;
+	struct iavf_device *ldev;
 	int ret = 0;
 
 	mutex_lock(&iavf_device_mutex);
-	list_for_each_entry(ldev, &i40e_devices, list) {
+	list_for_each_entry(ldev, &iavf_devices, list) {
 		if (ldev->vf == adapter) {
 			ret = -EEXIST;
 			goto out;
@@ -308,7 +308,7 @@ int iavf_lan_add_device(struct iavf_adapter *adapter)
 	}
 	ldev->vf = adapter;
 	INIT_LIST_HEAD(&ldev->list);
-	list_add(&ldev->list, &i40e_devices);
+	list_add(&ldev->list, &iavf_devices);
 	dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
 		 adapter->hw.bus.bus_id, adapter->hw.bus.device,
 		 adapter->hw.bus.func);
@@ -331,11 +331,11 @@ out:
  **/
 int iavf_lan_del_device(struct iavf_adapter *adapter)
 {
-	struct i40e_device *ldev, *tmp;
+	struct iavf_device *ldev, *tmp;
 	int ret = -ENODEV;
 
 	mutex_lock(&iavf_device_mutex);
-	list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) {
+	list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) {
 		if (ldev->vf == adapter) {
 			dev_info(&adapter->pdev->dev,
 				 "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
@@ -357,24 +357,24 @@ int iavf_lan_del_device(struct iavf_adapter *adapter)
  * @client: pointer to the registered client
  *
  **/
-static void iavf_client_release(struct i40e_client *client)
+static void iavf_client_release(struct iavf_client *client)
 {
-	struct i40e_client_instance *cinst;
-	struct i40e_device *ldev;
+	struct iavf_client_instance *cinst;
+	struct iavf_device *ldev;
 	struct iavf_adapter *adapter;
 
 	mutex_lock(&iavf_device_mutex);
-	list_for_each_entry(ldev, &i40e_devices, list) {
+	list_for_each_entry(ldev, &iavf_devices, list) {
 		adapter = ldev->vf;
 		cinst = adapter->cinst;
 		if (!cinst)
 			continue;
-		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+		if (test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
 			if (client->ops && client->ops->close)
 				client->ops->close(&cinst->lan_info, client,
 						   false);
 			iavf_client_release_qvlist(&cinst->lan_info);
-			clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
+			clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
 
 			dev_warn(&adapter->pdev->dev,
 				 "Client %s instance closed\n", client->name);
@@ -392,13 +392,13 @@ static void iavf_client_release(struct i40e_client *client)
  * @client: pointer to the registered client
  *
  **/
-static void iavf_client_prepare(struct i40e_client *client)
+static void iavf_client_prepare(struct iavf_client *client)
 {
-	struct i40e_device *ldev;
+	struct iavf_device *ldev;
 	struct iavf_adapter *adapter;
 
 	mutex_lock(&iavf_device_mutex);
-	list_for_each_entry(ldev, &i40e_devices, list) {
+	list_for_each_entry(ldev, &iavf_devices, list) {
 		adapter = ldev->vf;
 		/* Signal the watchdog to service the client */
 		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
@@ -415,18 +415,18 @@ static void iavf_client_prepare(struct i40e_client *client)
  *
  * Return 0 on success or < 0 on error
  **/
-static u32 iavf_client_virtchnl_send(struct i40e_info *ldev,
-				     struct i40e_client *client,
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
 				     u8 *msg, u16 len)
 {
 	struct iavf_adapter *adapter = ldev->vf;
-	iavf_status err;
+	enum iavf_status err;
 
 	if (adapter->aq_required)
 		return -EAGAIN;
 
 	err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP,
-				     I40E_SUCCESS, msg, len, NULL);
+				     IAVF_SUCCESS, msg, len, NULL);
 	if (err)
 		dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n",
 			err, adapter->hw.aq.asq_last_status);
@@ -442,16 +442,16 @@ static u32 iavf_client_virtchnl_send(struct i40e_info *ldev,
  *
  * Return 0 on success or < 0 on error
  **/
-static int iavf_client_setup_qvlist(struct i40e_info *ldev,
-				    struct i40e_client *client,
-				    struct i40e_qvlist_info *qvlist_info)
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info)
 {
 	struct virtchnl_iwarp_qvlist_info *v_qvlist_info;
 	struct iavf_adapter *adapter = ldev->vf;
-	struct i40e_qv_info *qv_info;
-	iavf_status err;
+	struct iavf_qv_info *qv_info;
+	enum iavf_status err;
 	u32 v_idx, i;
-	u32 msg_size;
+	size_t msg_size;
 
 	if (adapter->aq_required)
 		return -EAGAIN;
@@ -469,13 +469,12 @@ static int iavf_client_setup_qvlist(struct i40e_info *ldev,
 	}
 
 	v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info;
-	msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) +
-			(sizeof(struct virtchnl_iwarp_qv_info) *
-			(v_qvlist_info->num_vectors - 1));
+	msg_size = struct_size(v_qvlist_info, qv_info,
+			       v_qvlist_info->num_vectors - 1);
 
 	adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
 	err = iavf_aq_send_msg_to_pf(&adapter->hw,
-				VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, I40E_SUCCESS,
+				VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, IAVF_SUCCESS,
 				(u8 *)v_qvlist_info, msg_size, NULL);
 
 	if (err) {
@@ -499,12 +498,12 @@ out:
 }
 
 /**
- * iavf_register_client - Register a i40e client driver with the L2 driver
- * @client: pointer to the i40e_client struct
+ * iavf_register_client - Register a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
  *
  * Returns 0 on success or non-0 on error
  **/
-int iavf_register_client(struct i40e_client *client)
+int iavf_register_client(struct iavf_client *client)
 {
 	int ret = 0;
 
@@ -550,12 +549,12 @@ out:
 EXPORT_SYMBOL(iavf_register_client);
 
 /**
- * iavf_unregister_client - Unregister a i40e client driver with the L2 driver
- * @client: pointer to the i40e_client struct
+ * iavf_unregister_client - Unregister a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
  *
  * Returns 0 on success or non-0 on error
  **/
-int iavf_unregister_client(struct i40e_client *client)
+int iavf_unregister_client(struct iavf_client *client)
 {
 	int ret = 0;
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.h b/drivers/net/ethernet/intel/iavf/iavf_client.h
index e216fc9dfd81..9a7cf39ea75a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_client.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.h
@@ -17,86 +17,86 @@
 	__stringify(IAVF_CLIENT_VERSION_MINOR) "." \
 	__stringify(IAVF_CLIENT_VERSION_BUILD)
 
-struct i40e_client_version {
+struct iavf_client_version {
 	u8 major;
 	u8 minor;
 	u8 build;
 	u8 rsvd;
 };
 
-enum i40e_client_state {
-	__I40E_CLIENT_NULL,
-	__I40E_CLIENT_REGISTERED
+enum iavf_client_state {
+	__IAVF_CLIENT_NULL,
+	__IAVF_CLIENT_REGISTERED
 };
 
-enum i40e_client_instance_state {
-	__I40E_CLIENT_INSTANCE_NONE,
-	__I40E_CLIENT_INSTANCE_OPENED,
+enum iavf_client_instance_state {
+	__IAVF_CLIENT_INSTANCE_NONE,
+	__IAVF_CLIENT_INSTANCE_OPENED,
 };
 
-struct i40e_ops;
-struct i40e_client;
+struct iavf_ops;
+struct iavf_client;
 
 /* HW does not define a type value for AEQ; only for RX/TX and CEQ.
  * In order for us to keep the interface simple, SW will define a
  * unique type value for AEQ.
  */
-#define I40E_QUEUE_TYPE_PE_AEQ  0x80
-#define I40E_QUEUE_INVALID_IDX	0xFFFF
+#define IAVF_QUEUE_TYPE_PE_AEQ	0x80
+#define IAVF_QUEUE_INVALID_IDX	0xFFFF
 
-struct i40e_qv_info {
+struct iavf_qv_info {
 	u32 v_idx; /* msix_vector */
 	u16 ceq_idx;
 	u16 aeq_idx;
 	u8 itr_idx;
 };
 
-struct i40e_qvlist_info {
+struct iavf_qvlist_info {
 	u32 num_vectors;
-	struct i40e_qv_info qv_info[1];
+	struct iavf_qv_info qv_info[1];
 };
 
-#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
+#define IAVF_CLIENT_MSIX_ALL 0xFFFFFFFF
 
 /* set of LAN parameters useful for clients managed by LAN */
 
 /* Struct to hold per priority info */
-struct i40e_prio_qos_params {
+struct iavf_prio_qos_params {
 	u16 qs_handle; /* qs handle for prio */
 	u8 tc; /* TC mapped to prio */
 	u8 reserved;
 };
 
-#define I40E_CLIENT_MAX_USER_PRIORITY        8
+#define IAVF_CLIENT_MAX_USER_PRIORITY	8
 /* Struct to hold Client QoS */
-struct i40e_qos_params {
-	struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
+struct iavf_qos_params {
+	struct iavf_prio_qos_params prio_qos[IAVF_CLIENT_MAX_USER_PRIORITY];
 };
 
-struct i40e_params {
-	struct i40e_qos_params qos;
+struct iavf_params {
+	struct iavf_qos_params qos;
 	u16 mtu;
 	u16 link_up; /* boolean */
 };
 
 /* Structure to hold LAN device info for a client device */
-struct i40e_info {
-	struct i40e_client_version version;
+struct iavf_info {
+	struct iavf_client_version version;
 	u8 lanmac[6];
 	struct net_device *netdev;
 	struct pci_dev *pcidev;
 	u8 __iomem *hw_addr;
 	u8 fid;	/* function id, PF id or VF id */
-#define I40E_CLIENT_FTYPE_PF 0
-#define I40E_CLIENT_FTYPE_VF 1
+#define IAVF_CLIENT_FTYPE_PF 0
+#define IAVF_CLIENT_FTYPE_VF 1
 	u8 ftype; /* function type, PF or VF */
 	void *vf; /* cast to iavf_adapter */
 
 	/* All L2 params that could change during the life span of the device
 	 * and needs to be communicated to the client when they change
 	 */
-	struct i40e_params params;
-	struct i40e_ops *ops;
+	struct iavf_params params;
+	struct iavf_ops *ops;
 
 	u16 msix_count;	 /* number of msix vectors*/
 	/* Array down below will be dynamically allocated based on msix_count */
@@ -104,66 +104,66 @@ struct i40e_info {
 	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
 };
 
-struct i40e_ops {
+struct iavf_ops {
 	/* setup_q_vector_list enables queues with a particular vector */
-	int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
-			    struct i40e_qvlist_info *qv_info);
+	int (*setup_qvlist)(struct iavf_info *ldev, struct iavf_client *client,
+			    struct iavf_qvlist_info *qv_info);
 
-	u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
+	u32 (*virtchnl_send)(struct iavf_info *ldev, struct iavf_client *client,
 			     u8 *msg, u16 len);
 
 	/* If the PE Engine is unresponsive, RDMA driver can request a reset.*/
-	void (*request_reset)(struct i40e_info *ldev,
-			      struct i40e_client *client);
+	void (*request_reset)(struct iavf_info *ldev,
+			      struct iavf_client *client);
 };
 
-struct i40e_client_ops {
+struct iavf_client_ops {
 	/* Should be called from register_client() or whenever the driver is
 	 * ready to create a specific client instance.
 	 */
-	int (*open)(struct i40e_info *ldev, struct i40e_client *client);
+	int (*open)(struct iavf_info *ldev, struct iavf_client *client);
 
 	/* Should be closed when netdev is unavailable or when unregister
 	 * call comes in. If the close happens due to a reset, set the reset
 	 * bit to true.
 	 */
-	void (*close)(struct i40e_info *ldev, struct i40e_client *client,
+	void (*close)(struct iavf_info *ldev, struct iavf_client *client,
 		      bool reset);
 
 	/* called when some l2 managed parameters changes - mss */
-	void (*l2_param_change)(struct i40e_info *ldev,
-				struct i40e_client *client,
-				struct i40e_params *params);
+	void (*l2_param_change)(struct iavf_info *ldev,
+				struct iavf_client *client,
+				struct iavf_params *params);
 
 	/* called when a message is received from the PF */
-	int (*virtchnl_receive)(struct i40e_info *ldev,
-				struct i40e_client *client,
+	int (*virtchnl_receive)(struct iavf_info *ldev,
+				struct iavf_client *client,
 				u8 *msg, u16 len);
 };
 
 /* Client device */
-struct i40e_client_instance {
+struct iavf_client_instance {
 	struct list_head list;
-	struct i40e_info lan_info;
-	struct i40e_client *client;
+	struct iavf_info lan_info;
+	struct iavf_client *client;
 	unsigned long  state;
 };
 
-struct i40e_client {
+struct iavf_client {
 	struct list_head list;		/* list of registered clients */
 	char name[IAVF_CLIENT_STR_LENGTH];
-	struct i40e_client_version version;
+	struct iavf_client_version version;
 	unsigned long state;		/* client state */
 	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
 	u32 flags;
-#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
-#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
+#define IAVF_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
+#define IAVF_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
 	u8 type;
-#define I40E_CLIENT_IWARP 0
-	struct i40e_client_ops *ops;	/* client ops provided by the client */
+#define IAVF_CLIENT_IWARP 0
+	struct iavf_client_ops *ops;	/* client ops provided by the client */
 };
 
 /* used by clients */
-int iavf_register_client(struct i40e_client *client);
-int iavf_unregister_client(struct i40e_client *client);
+int iavf_register_client(struct iavf_client *client);
+int iavf_unregister_client(struct iavf_client *client);
 #endif /* _IAVF_CLIENT_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
index 768369c89e77..8547fc8fdfd6 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
@@ -2,7 +2,7 @@
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include "iavf_type.h"
-#include "i40e_adminq.h"
+#include "iavf_adminq.h"
 #include "iavf_prototype.h"
 #include <linux/avf/virtchnl.h>
 
@@ -13,9 +13,9 @@
  * This function sets the mac type of the adapter based on the
  * vendor ID and device ID stored in the hw structure.
  **/
-iavf_status iavf_set_mac_type(struct iavf_hw *hw)
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw)
 {
-	iavf_status status = 0;
+	enum iavf_status status = 0;
 
 	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
 		switch (hw->device_id) {
@@ -32,7 +32,7 @@ iavf_status iavf_set_mac_type(struct iavf_hw *hw)
 			break;
 		}
 	} else {
-		status = I40E_ERR_DEVICE_NOT_SUPPORTED;
+		status = IAVF_ERR_DEVICE_NOT_SUPPORTED;
 	}
 
 	hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status);
@@ -44,55 +44,55 @@ iavf_status iavf_set_mac_type(struct iavf_hw *hw)
  * @hw: pointer to the HW structure
  * @aq_err: the AQ error code to convert
  **/
-const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err)
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err)
 {
 	switch (aq_err) {
-	case I40E_AQ_RC_OK:
+	case IAVF_AQ_RC_OK:
 		return "OK";
-	case I40E_AQ_RC_EPERM:
-		return "I40E_AQ_RC_EPERM";
-	case I40E_AQ_RC_ENOENT:
-		return "I40E_AQ_RC_ENOENT";
-	case I40E_AQ_RC_ESRCH:
-		return "I40E_AQ_RC_ESRCH";
-	case I40E_AQ_RC_EINTR:
-		return "I40E_AQ_RC_EINTR";
-	case I40E_AQ_RC_EIO:
-		return "I40E_AQ_RC_EIO";
-	case I40E_AQ_RC_ENXIO:
-		return "I40E_AQ_RC_ENXIO";
-	case I40E_AQ_RC_E2BIG:
-		return "I40E_AQ_RC_E2BIG";
-	case I40E_AQ_RC_EAGAIN:
-		return "I40E_AQ_RC_EAGAIN";
-	case I40E_AQ_RC_ENOMEM:
-		return "I40E_AQ_RC_ENOMEM";
-	case I40E_AQ_RC_EACCES:
-		return "I40E_AQ_RC_EACCES";
-	case I40E_AQ_RC_EFAULT:
-		return "I40E_AQ_RC_EFAULT";
-	case I40E_AQ_RC_EBUSY:
-		return "I40E_AQ_RC_EBUSY";
-	case I40E_AQ_RC_EEXIST:
-		return "I40E_AQ_RC_EEXIST";
-	case I40E_AQ_RC_EINVAL:
-		return "I40E_AQ_RC_EINVAL";
-	case I40E_AQ_RC_ENOTTY:
-		return "I40E_AQ_RC_ENOTTY";
-	case I40E_AQ_RC_ENOSPC:
-		return "I40E_AQ_RC_ENOSPC";
-	case I40E_AQ_RC_ENOSYS:
-		return "I40E_AQ_RC_ENOSYS";
-	case I40E_AQ_RC_ERANGE:
-		return "I40E_AQ_RC_ERANGE";
-	case I40E_AQ_RC_EFLUSHED:
-		return "I40E_AQ_RC_EFLUSHED";
-	case I40E_AQ_RC_BAD_ADDR:
-		return "I40E_AQ_RC_BAD_ADDR";
-	case I40E_AQ_RC_EMODE:
-		return "I40E_AQ_RC_EMODE";
-	case I40E_AQ_RC_EFBIG:
-		return "I40E_AQ_RC_EFBIG";
+	case IAVF_AQ_RC_EPERM:
+		return "IAVF_AQ_RC_EPERM";
+	case IAVF_AQ_RC_ENOENT:
+		return "IAVF_AQ_RC_ENOENT";
+	case IAVF_AQ_RC_ESRCH:
+		return "IAVF_AQ_RC_ESRCH";
+	case IAVF_AQ_RC_EINTR:
+		return "IAVF_AQ_RC_EINTR";
+	case IAVF_AQ_RC_EIO:
+		return "IAVF_AQ_RC_EIO";
+	case IAVF_AQ_RC_ENXIO:
+		return "IAVF_AQ_RC_ENXIO";
+	case IAVF_AQ_RC_E2BIG:
+		return "IAVF_AQ_RC_E2BIG";
+	case IAVF_AQ_RC_EAGAIN:
+		return "IAVF_AQ_RC_EAGAIN";
+	case IAVF_AQ_RC_ENOMEM:
+		return "IAVF_AQ_RC_ENOMEM";
+	case IAVF_AQ_RC_EACCES:
+		return "IAVF_AQ_RC_EACCES";
+	case IAVF_AQ_RC_EFAULT:
+		return "IAVF_AQ_RC_EFAULT";
+	case IAVF_AQ_RC_EBUSY:
+		return "IAVF_AQ_RC_EBUSY";
+	case IAVF_AQ_RC_EEXIST:
+		return "IAVF_AQ_RC_EEXIST";
+	case IAVF_AQ_RC_EINVAL:
+		return "IAVF_AQ_RC_EINVAL";
+	case IAVF_AQ_RC_ENOTTY:
+		return "IAVF_AQ_RC_ENOTTY";
+	case IAVF_AQ_RC_ENOSPC:
+		return "IAVF_AQ_RC_ENOSPC";
+	case IAVF_AQ_RC_ENOSYS:
+		return "IAVF_AQ_RC_ENOSYS";
+	case IAVF_AQ_RC_ERANGE:
+		return "IAVF_AQ_RC_ERANGE";
+	case IAVF_AQ_RC_EFLUSHED:
+		return "IAVF_AQ_RC_EFLUSHED";
+	case IAVF_AQ_RC_BAD_ADDR:
+		return "IAVF_AQ_RC_BAD_ADDR";
+	case IAVF_AQ_RC_EMODE:
+		return "IAVF_AQ_RC_EMODE";
+	case IAVF_AQ_RC_EFBIG:
+		return "IAVF_AQ_RC_EFBIG";
 	}
 
 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
@@ -104,143 +104,143 @@ const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err)
  * @hw: pointer to the HW structure
  * @stat_err: the status error code to convert
  **/
-const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err)
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err)
 {
 	switch (stat_err) {
 	case 0:
 		return "OK";
-	case I40E_ERR_NVM:
-		return "I40E_ERR_NVM";
-	case I40E_ERR_NVM_CHECKSUM:
-		return "I40E_ERR_NVM_CHECKSUM";
-	case I40E_ERR_PHY:
-		return "I40E_ERR_PHY";
-	case I40E_ERR_CONFIG:
-		return "I40E_ERR_CONFIG";
-	case I40E_ERR_PARAM:
-		return "I40E_ERR_PARAM";
-	case I40E_ERR_MAC_TYPE:
-		return "I40E_ERR_MAC_TYPE";
-	case I40E_ERR_UNKNOWN_PHY:
-		return "I40E_ERR_UNKNOWN_PHY";
-	case I40E_ERR_LINK_SETUP:
-		return "I40E_ERR_LINK_SETUP";
-	case I40E_ERR_ADAPTER_STOPPED:
-		return "I40E_ERR_ADAPTER_STOPPED";
-	case I40E_ERR_INVALID_MAC_ADDR:
-		return "I40E_ERR_INVALID_MAC_ADDR";
-	case I40E_ERR_DEVICE_NOT_SUPPORTED:
-		return "I40E_ERR_DEVICE_NOT_SUPPORTED";
-	case I40E_ERR_MASTER_REQUESTS_PENDING:
-		return "I40E_ERR_MASTER_REQUESTS_PENDING";
-	case I40E_ERR_INVALID_LINK_SETTINGS:
-		return "I40E_ERR_INVALID_LINK_SETTINGS";
-	case I40E_ERR_AUTONEG_NOT_COMPLETE:
-		return "I40E_ERR_AUTONEG_NOT_COMPLETE";
-	case I40E_ERR_RESET_FAILED:
-		return "I40E_ERR_RESET_FAILED";
-	case I40E_ERR_SWFW_SYNC:
-		return "I40E_ERR_SWFW_SYNC";
-	case I40E_ERR_NO_AVAILABLE_VSI:
-		return "I40E_ERR_NO_AVAILABLE_VSI";
-	case I40E_ERR_NO_MEMORY:
-		return "I40E_ERR_NO_MEMORY";
-	case I40E_ERR_BAD_PTR:
-		return "I40E_ERR_BAD_PTR";
-	case I40E_ERR_RING_FULL:
-		return "I40E_ERR_RING_FULL";
-	case I40E_ERR_INVALID_PD_ID:
-		return "I40E_ERR_INVALID_PD_ID";
-	case I40E_ERR_INVALID_QP_ID:
-		return "I40E_ERR_INVALID_QP_ID";
-	case I40E_ERR_INVALID_CQ_ID:
-		return "I40E_ERR_INVALID_CQ_ID";
-	case I40E_ERR_INVALID_CEQ_ID:
-		return "I40E_ERR_INVALID_CEQ_ID";
-	case I40E_ERR_INVALID_AEQ_ID:
-		return "I40E_ERR_INVALID_AEQ_ID";
-	case I40E_ERR_INVALID_SIZE:
-		return "I40E_ERR_INVALID_SIZE";
-	case I40E_ERR_INVALID_ARP_INDEX:
-		return "I40E_ERR_INVALID_ARP_INDEX";
-	case I40E_ERR_INVALID_FPM_FUNC_ID:
-		return "I40E_ERR_INVALID_FPM_FUNC_ID";
-	case I40E_ERR_QP_INVALID_MSG_SIZE:
-		return "I40E_ERR_QP_INVALID_MSG_SIZE";
-	case I40E_ERR_QP_TOOMANY_WRS_POSTED:
-		return "I40E_ERR_QP_TOOMANY_WRS_POSTED";
-	case I40E_ERR_INVALID_FRAG_COUNT:
-		return "I40E_ERR_INVALID_FRAG_COUNT";
-	case I40E_ERR_QUEUE_EMPTY:
-		return "I40E_ERR_QUEUE_EMPTY";
-	case I40E_ERR_INVALID_ALIGNMENT:
-		return "I40E_ERR_INVALID_ALIGNMENT";
-	case I40E_ERR_FLUSHED_QUEUE:
-		return "I40E_ERR_FLUSHED_QUEUE";
-	case I40E_ERR_INVALID_PUSH_PAGE_INDEX:
-		return "I40E_ERR_INVALID_PUSH_PAGE_INDEX";
-	case I40E_ERR_INVALID_IMM_DATA_SIZE:
-		return "I40E_ERR_INVALID_IMM_DATA_SIZE";
-	case I40E_ERR_TIMEOUT:
-		return "I40E_ERR_TIMEOUT";
-	case I40E_ERR_OPCODE_MISMATCH:
-		return "I40E_ERR_OPCODE_MISMATCH";
-	case I40E_ERR_CQP_COMPL_ERROR:
-		return "I40E_ERR_CQP_COMPL_ERROR";
-	case I40E_ERR_INVALID_VF_ID:
-		return "I40E_ERR_INVALID_VF_ID";
-	case I40E_ERR_INVALID_HMCFN_ID:
-		return "I40E_ERR_INVALID_HMCFN_ID";
-	case I40E_ERR_BACKING_PAGE_ERROR:
-		return "I40E_ERR_BACKING_PAGE_ERROR";
-	case I40E_ERR_NO_PBLCHUNKS_AVAILABLE:
-		return "I40E_ERR_NO_PBLCHUNKS_AVAILABLE";
-	case I40E_ERR_INVALID_PBLE_INDEX:
-		return "I40E_ERR_INVALID_PBLE_INDEX";
-	case I40E_ERR_INVALID_SD_INDEX:
-		return "I40E_ERR_INVALID_SD_INDEX";
-	case I40E_ERR_INVALID_PAGE_DESC_INDEX:
-		return "I40E_ERR_INVALID_PAGE_DESC_INDEX";
-	case I40E_ERR_INVALID_SD_TYPE:
-		return "I40E_ERR_INVALID_SD_TYPE";
-	case I40E_ERR_MEMCPY_FAILED:
-		return "I40E_ERR_MEMCPY_FAILED";
-	case I40E_ERR_INVALID_HMC_OBJ_INDEX:
-		return "I40E_ERR_INVALID_HMC_OBJ_INDEX";
-	case I40E_ERR_INVALID_HMC_OBJ_COUNT:
-		return "I40E_ERR_INVALID_HMC_OBJ_COUNT";
-	case I40E_ERR_INVALID_SRQ_ARM_LIMIT:
-		return "I40E_ERR_INVALID_SRQ_ARM_LIMIT";
-	case I40E_ERR_SRQ_ENABLED:
-		return "I40E_ERR_SRQ_ENABLED";
-	case I40E_ERR_ADMIN_QUEUE_ERROR:
-		return "I40E_ERR_ADMIN_QUEUE_ERROR";
-	case I40E_ERR_ADMIN_QUEUE_TIMEOUT:
-		return "I40E_ERR_ADMIN_QUEUE_TIMEOUT";
-	case I40E_ERR_BUF_TOO_SHORT:
-		return "I40E_ERR_BUF_TOO_SHORT";
-	case I40E_ERR_ADMIN_QUEUE_FULL:
-		return "I40E_ERR_ADMIN_QUEUE_FULL";
-	case I40E_ERR_ADMIN_QUEUE_NO_WORK:
-		return "I40E_ERR_ADMIN_QUEUE_NO_WORK";
-	case I40E_ERR_BAD_IWARP_CQE:
-		return "I40E_ERR_BAD_IWARP_CQE";
-	case I40E_ERR_NVM_BLANK_MODE:
-		return "I40E_ERR_NVM_BLANK_MODE";
-	case I40E_ERR_NOT_IMPLEMENTED:
-		return "I40E_ERR_NOT_IMPLEMENTED";
-	case I40E_ERR_PE_DOORBELL_NOT_ENABLED:
-		return "I40E_ERR_PE_DOORBELL_NOT_ENABLED";
-	case I40E_ERR_DIAG_TEST_FAILED:
-		return "I40E_ERR_DIAG_TEST_FAILED";
-	case I40E_ERR_NOT_READY:
-		return "I40E_ERR_NOT_READY";
-	case I40E_NOT_SUPPORTED:
-		return "I40E_NOT_SUPPORTED";
-	case I40E_ERR_FIRMWARE_API_VERSION:
-		return "I40E_ERR_FIRMWARE_API_VERSION";
-	case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
-		return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
+	case IAVF_ERR_NVM:
+		return "IAVF_ERR_NVM";
+	case IAVF_ERR_NVM_CHECKSUM:
+		return "IAVF_ERR_NVM_CHECKSUM";
+	case IAVF_ERR_PHY:
+		return "IAVF_ERR_PHY";
+	case IAVF_ERR_CONFIG:
+		return "IAVF_ERR_CONFIG";
+	case IAVF_ERR_PARAM:
+		return "IAVF_ERR_PARAM";
+	case IAVF_ERR_MAC_TYPE:
+		return "IAVF_ERR_MAC_TYPE";
+	case IAVF_ERR_UNKNOWN_PHY:
+		return "IAVF_ERR_UNKNOWN_PHY";
+	case IAVF_ERR_LINK_SETUP:
+		return "IAVF_ERR_LINK_SETUP";
+	case IAVF_ERR_ADAPTER_STOPPED:
+		return "IAVF_ERR_ADAPTER_STOPPED";
+	case IAVF_ERR_INVALID_MAC_ADDR:
+		return "IAVF_ERR_INVALID_MAC_ADDR";
+	case IAVF_ERR_DEVICE_NOT_SUPPORTED:
+		return "IAVF_ERR_DEVICE_NOT_SUPPORTED";
+	case IAVF_ERR_MASTER_REQUESTS_PENDING:
+		return "IAVF_ERR_MASTER_REQUESTS_PENDING";
+	case IAVF_ERR_INVALID_LINK_SETTINGS:
+		return "IAVF_ERR_INVALID_LINK_SETTINGS";
+	case IAVF_ERR_AUTONEG_NOT_COMPLETE:
+		return "IAVF_ERR_AUTONEG_NOT_COMPLETE";
+	case IAVF_ERR_RESET_FAILED:
+		return "IAVF_ERR_RESET_FAILED";
+	case IAVF_ERR_SWFW_SYNC:
+		return "IAVF_ERR_SWFW_SYNC";
+	case IAVF_ERR_NO_AVAILABLE_VSI:
+		return "IAVF_ERR_NO_AVAILABLE_VSI";
+	case IAVF_ERR_NO_MEMORY:
+		return "IAVF_ERR_NO_MEMORY";
+	case IAVF_ERR_BAD_PTR:
+		return "IAVF_ERR_BAD_PTR";
+	case IAVF_ERR_RING_FULL:
+		return "IAVF_ERR_RING_FULL";
+	case IAVF_ERR_INVALID_PD_ID:
+		return "IAVF_ERR_INVALID_PD_ID";
+	case IAVF_ERR_INVALID_QP_ID:
+		return "IAVF_ERR_INVALID_QP_ID";
+	case IAVF_ERR_INVALID_CQ_ID:
+		return "IAVF_ERR_INVALID_CQ_ID";
+	case IAVF_ERR_INVALID_CEQ_ID:
+		return "IAVF_ERR_INVALID_CEQ_ID";
+	case IAVF_ERR_INVALID_AEQ_ID:
+		return "IAVF_ERR_INVALID_AEQ_ID";
+	case IAVF_ERR_INVALID_SIZE:
+		return "IAVF_ERR_INVALID_SIZE";
+	case IAVF_ERR_INVALID_ARP_INDEX:
+		return "IAVF_ERR_INVALID_ARP_INDEX";
+	case IAVF_ERR_INVALID_FPM_FUNC_ID:
+		return "IAVF_ERR_INVALID_FPM_FUNC_ID";
+	case IAVF_ERR_QP_INVALID_MSG_SIZE:
+		return "IAVF_ERR_QP_INVALID_MSG_SIZE";
+	case IAVF_ERR_QP_TOOMANY_WRS_POSTED:
+		return "IAVF_ERR_QP_TOOMANY_WRS_POSTED";
+	case IAVF_ERR_INVALID_FRAG_COUNT:
+		return "IAVF_ERR_INVALID_FRAG_COUNT";
+	case IAVF_ERR_QUEUE_EMPTY:
+		return "IAVF_ERR_QUEUE_EMPTY";
+	case IAVF_ERR_INVALID_ALIGNMENT:
+		return "IAVF_ERR_INVALID_ALIGNMENT";
+	case IAVF_ERR_FLUSHED_QUEUE:
+		return "IAVF_ERR_FLUSHED_QUEUE";
+	case IAVF_ERR_INVALID_PUSH_PAGE_INDEX:
+		return "IAVF_ERR_INVALID_PUSH_PAGE_INDEX";
+	case IAVF_ERR_INVALID_IMM_DATA_SIZE:
+		return "IAVF_ERR_INVALID_IMM_DATA_SIZE";
+	case IAVF_ERR_TIMEOUT:
+		return "IAVF_ERR_TIMEOUT";
+	case IAVF_ERR_OPCODE_MISMATCH:
+		return "IAVF_ERR_OPCODE_MISMATCH";
+	case IAVF_ERR_CQP_COMPL_ERROR:
+		return "IAVF_ERR_CQP_COMPL_ERROR";
+	case IAVF_ERR_INVALID_VF_ID:
+		return "IAVF_ERR_INVALID_VF_ID";
+	case IAVF_ERR_INVALID_HMCFN_ID:
+		return "IAVF_ERR_INVALID_HMCFN_ID";
+	case IAVF_ERR_BACKING_PAGE_ERROR:
+		return "IAVF_ERR_BACKING_PAGE_ERROR";
+	case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE:
+		return "IAVF_ERR_NO_PBLCHUNKS_AVAILABLE";
+	case IAVF_ERR_INVALID_PBLE_INDEX:
+		return "IAVF_ERR_INVALID_PBLE_INDEX";
+	case IAVF_ERR_INVALID_SD_INDEX:
+		return "IAVF_ERR_INVALID_SD_INDEX";
+	case IAVF_ERR_INVALID_PAGE_DESC_INDEX:
+		return "IAVF_ERR_INVALID_PAGE_DESC_INDEX";
+	case IAVF_ERR_INVALID_SD_TYPE:
+		return "IAVF_ERR_INVALID_SD_TYPE";
+	case IAVF_ERR_MEMCPY_FAILED:
+		return "IAVF_ERR_MEMCPY_FAILED";
+	case IAVF_ERR_INVALID_HMC_OBJ_INDEX:
+		return "IAVF_ERR_INVALID_HMC_OBJ_INDEX";
+	case IAVF_ERR_INVALID_HMC_OBJ_COUNT:
+		return "IAVF_ERR_INVALID_HMC_OBJ_COUNT";
+	case IAVF_ERR_INVALID_SRQ_ARM_LIMIT:
+		return "IAVF_ERR_INVALID_SRQ_ARM_LIMIT";
+	case IAVF_ERR_SRQ_ENABLED:
+		return "IAVF_ERR_SRQ_ENABLED";
+	case IAVF_ERR_ADMIN_QUEUE_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_ERROR";
+	case IAVF_ERR_ADMIN_QUEUE_TIMEOUT:
+		return "IAVF_ERR_ADMIN_QUEUE_TIMEOUT";
+	case IAVF_ERR_BUF_TOO_SHORT:
+		return "IAVF_ERR_BUF_TOO_SHORT";
+	case IAVF_ERR_ADMIN_QUEUE_FULL:
+		return "IAVF_ERR_ADMIN_QUEUE_FULL";
+	case IAVF_ERR_ADMIN_QUEUE_NO_WORK:
+		return "IAVF_ERR_ADMIN_QUEUE_NO_WORK";
+	case IAVF_ERR_BAD_IWARP_CQE:
+		return "IAVF_ERR_BAD_IWARP_CQE";
+	case IAVF_ERR_NVM_BLANK_MODE:
+		return "IAVF_ERR_NVM_BLANK_MODE";
+	case IAVF_ERR_NOT_IMPLEMENTED:
+		return "IAVF_ERR_NOT_IMPLEMENTED";
+	case IAVF_ERR_PE_DOORBELL_NOT_ENABLED:
+		return "IAVF_ERR_PE_DOORBELL_NOT_ENABLED";
+	case IAVF_ERR_DIAG_TEST_FAILED:
+		return "IAVF_ERR_DIAG_TEST_FAILED";
+	case IAVF_ERR_NOT_READY:
+		return "IAVF_ERR_NOT_READY";
+	case IAVF_NOT_SUPPORTED:
+		return "IAVF_NOT_SUPPORTED";
+	case IAVF_ERR_FIRMWARE_API_VERSION:
+		return "IAVF_ERR_FIRMWARE_API_VERSION";
+	case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
 	}
 
 	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
@@ -260,7 +260,7 @@ const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err)
 void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask, void *desc,
 		   void *buffer, u16 buf_len)
 {
-	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+	struct iavf_aq_desc *aq_desc = (struct iavf_aq_desc *)desc;
 	u8 *buf = (u8 *)buffer;
 
 	if ((!(mask & hw->debug_mask)) || !desc)
@@ -327,17 +327,17 @@ bool iavf_check_asq_alive(struct iavf_hw *hw)
  * Tell the Firmware that we're shutting down the AdminQ and whether
  * or not the driver is unloading as well.
  **/
-iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading)
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading)
 {
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_queue_shutdown *cmd =
-		(struct i40e_aqc_queue_shutdown *)&desc.params.raw;
-	iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_queue_shutdown *cmd =
+		(struct iavf_aqc_queue_shutdown *)&desc.params.raw;
+	enum iavf_status status;
 
-	iavf_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_queue_shutdown);
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_queue_shutdown);
 
 	if (unloading)
-		cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING);
+		cmd->driver_unloading = cpu_to_le32(IAVF_AQ_DRIVER_UNLOADING);
 	status = iavf_asq_send_command(hw, &desc, NULL, 0, NULL);
 
 	return status;
@@ -354,43 +354,43 @@ iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading)
  *
  * Internal function to get or set RSS look up table
  **/
-static iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw,
-					   u16 vsi_id, bool pf_lut,
-					   u8 *lut, u16 lut_size,
-					   bool set)
+static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw,
+						u16 vsi_id, bool pf_lut,
+						u8 *lut, u16 lut_size,
+						bool set)
 {
-	iavf_status status;
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_get_set_rss_lut *cmd_resp =
-		   (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw;
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_lut *cmd_resp =
+		   (struct iavf_aqc_get_set_rss_lut *)&desc.params.raw;
 
 	if (set)
 		iavf_fill_default_direct_cmd_desc(&desc,
-						  i40e_aqc_opc_set_rss_lut);
+						  iavf_aqc_opc_set_rss_lut);
 	else
 		iavf_fill_default_direct_cmd_desc(&desc,
-						  i40e_aqc_opc_get_rss_lut);
+						  iavf_aqc_opc_get_rss_lut);
 
 	/* Indirect command */
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
 
 	cmd_resp->vsi_id =
 			cpu_to_le16((u16)((vsi_id <<
-					  I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
-					  I40E_AQC_SET_RSS_LUT_VSI_ID_MASK));
-	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID);
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_LUT_VSI_VALID);
 
 	if (pf_lut)
 		cmd_resp->flags |= cpu_to_le16((u16)
-					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
 	else
 		cmd_resp->flags |= cpu_to_le16((u16)
-					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
 
 	status = iavf_asq_send_command(hw, &desc, lut, lut_size, NULL);
 
@@ -407,8 +407,8 @@ static iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw,
  *
  * get the RSS lookup table, PF or VSI type
  **/
-iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id,
-				bool pf_lut, u8 *lut, u16 lut_size)
+enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id,
+				     bool pf_lut, u8 *lut, u16 lut_size)
 {
 	return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
 				       false);
@@ -424,8 +424,8 @@ iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id,
  *
  * set the RSS lookup table, PF or VSI type
  **/
-iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id,
-				bool pf_lut, u8 *lut, u16 lut_size)
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id,
+				     bool pf_lut, u8 *lut, u16 lut_size)
 {
 	return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
 }
@@ -439,33 +439,33 @@ iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id,
  *
  * get the RSS key per VSI
  **/
-static
+static enum
 iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
-				    struct i40e_aqc_get_set_rss_key_data *key,
+				    struct iavf_aqc_get_set_rss_key_data *key,
 				    bool set)
 {
-	iavf_status status;
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_get_set_rss_key *cmd_resp =
-			(struct i40e_aqc_get_set_rss_key *)&desc.params.raw;
-	u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data);
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_key *cmd_resp =
+			(struct iavf_aqc_get_set_rss_key *)&desc.params.raw;
+	u16 key_size = sizeof(struct iavf_aqc_get_set_rss_key_data);
 
 	if (set)
 		iavf_fill_default_direct_cmd_desc(&desc,
-						  i40e_aqc_opc_set_rss_key);
+						  iavf_aqc_opc_set_rss_key);
 	else
 		iavf_fill_default_direct_cmd_desc(&desc,
-						  i40e_aqc_opc_get_rss_key);
+						  iavf_aqc_opc_get_rss_key);
 
 	/* Indirect command */
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
 
 	cmd_resp->vsi_id =
 			cpu_to_le16((u16)((vsi_id <<
-					  I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
-					  I40E_AQC_SET_RSS_KEY_VSI_ID_MASK));
-	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID);
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_KEY_VSI_VALID);
 
 	status = iavf_asq_send_command(hw, &desc, key, key_size, NULL);
 
@@ -479,8 +479,8 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
  * @key: pointer to key info struct
  *
  **/
-iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id,
-				struct i40e_aqc_get_set_rss_key_data *key)
+enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				     struct iavf_aqc_get_set_rss_key_data *key)
 {
 	return iavf_aq_get_set_rss_key(hw, vsi_id, key, false);
 }
@@ -493,8 +493,8 @@ iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id,
  *
  * set the RSS key per VSI
  **/
-iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
-				struct i40e_aqc_get_set_rss_key_data *key)
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				     struct iavf_aqc_get_set_rss_key_data *key)
 {
 	return iavf_aq_get_set_rss_key(hw, vsi_id, key, true);
 }
@@ -515,7 +515,7 @@ iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
  * IF NOT iavf_ptype_lookup[ptype].known
  * THEN
  *      Packet is unknown
- * ELSE IF iavf_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP
  *      Use the rest of the fields to look at the tunnels, inner protocols, etc
  * ELSE
  *      Use the enum iavf_rx_l2_ptype to decode the packet type
@@ -877,24 +877,25 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = {
  * is sent asynchronously, i.e. iavf_asq_send_command() does not wait for
  * completion before returning.
  **/
-iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
-				   enum virtchnl_ops v_opcode,
-				   iavf_status v_retval, u8 *msg, u16 msglen,
-				   struct i40e_asq_cmd_details *cmd_details)
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details)
 {
-	struct i40e_asq_cmd_details details;
-	struct i40e_aq_desc desc;
-	iavf_status status;
+	struct iavf_asq_cmd_details details;
+	struct iavf_aq_desc desc;
+	enum iavf_status status;
 
-	iavf_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_pf);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_SI);
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_send_msg_to_pf);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_SI);
 	desc.cookie_high = cpu_to_le32(v_opcode);
 	desc.cookie_low = cpu_to_le32(v_retval);
 	if (msglen) {
-		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF
-						| I40E_AQ_FLAG_RD));
-		if (msglen > I40E_AQ_LARGE_BUF)
-			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc.flags |= cpu_to_le16((u16)(IAVF_AQ_FLAG_BUF
+						| IAVF_AQ_FLAG_RD));
+		if (msglen > IAVF_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_LB);
 		desc.datalen = cpu_to_le16(msglen);
 	}
 	if (!cmd_details) {
@@ -948,7 +949,7 @@ void iavf_vf_parse_hw_config(struct iavf_hw *hw,
  * as none will be forthcoming. Immediately after calling this function,
  * the admin queue should be shut down and (optionally) reinitialized.
  **/
-iavf_status iavf_vf_reset(struct iavf_hw *hw)
+enum iavf_status iavf_vf_reset(struct iavf_hw *hw)
 {
 	return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF,
 				      0, NULL, 0, NULL);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 9f87304109fe..dad3eec8ccd8 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -280,10 +280,10 @@ static int iavf_get_link_ksettings(struct net_device *netdev,
 	cmd->base.port = PORT_NONE;
 	/* Set speed and duplex */
 	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
+	case IAVF_LINK_SPEED_40GB:
 		cmd->base.speed = SPEED_40000;
 		break;
-	case I40E_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_25GB:
 #ifdef SPEED_25000
 		cmd->base.speed = SPEED_25000;
 #else
@@ -291,16 +291,16 @@ static int iavf_get_link_ksettings(struct net_device *netdev,
 			    "Speed is 25G, display not supported by this version of ethtool.\n");
 #endif
 		break;
-	case I40E_LINK_SPEED_20GB:
+	case IAVF_LINK_SPEED_20GB:
 		cmd->base.speed = SPEED_20000;
 		break;
-	case I40E_LINK_SPEED_10GB:
+	case IAVF_LINK_SPEED_10GB:
 		cmd->base.speed = SPEED_10000;
 		break;
-	case I40E_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_1GB:
 		cmd->base.speed = SPEED_1000;
 		break;
-	case I40E_LINK_SPEED_100MB:
+	case IAVF_LINK_SPEED_100MB:
 		cmd->base.speed = SPEED_100;
 		break;
 	default:
@@ -510,7 +510,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
 	if (changed_flags & IAVF_FLAG_LEGACY_RX) {
 		if (netif_running(netdev)) {
 			adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-			schedule_work(&adapter->reset_task);
+			queue_work(iavf_wq, &adapter->reset_task);
 		}
 	}
 
@@ -622,7 +622,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
 
 	if (netif_running(netdev)) {
 		adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-		schedule_work(&adapter->reset_task);
+		queue_work(iavf_wq, &adapter->reset_task);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 4569d69a2b55..9d2b50964a08 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -14,6 +14,8 @@
 static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter);
 static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter);
 static int iavf_close(struct net_device *netdev);
+static int iavf_init_get_resources(struct iavf_adapter *adapter);
+static int iavf_check_reset_complete(struct iavf_hw *hw);
 
 char iavf_driver_name[] = "iavf";
 static const char iavf_driver_string[] =
@@ -57,7 +59,8 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver")
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
-static struct workqueue_struct *iavf_wq;
+static const struct net_device_ops iavf_netdev_ops;
+struct workqueue_struct *iavf_wq;
 
 /**
  * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
@@ -66,14 +69,14 @@ static struct workqueue_struct *iavf_wq;
  * @size: size of memory requested
  * @alignment: what to align the allocation to
  **/
-iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
-				    struct iavf_dma_mem *mem,
-				    u64 size, u32 alignment)
+enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
+					 struct iavf_dma_mem *mem,
+					 u64 size, u32 alignment)
 {
 	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
 
 	if (!mem)
-		return I40E_ERR_PARAM;
+		return IAVF_ERR_PARAM;
 
 	mem->size = ALIGN(size, alignment);
 	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
@@ -81,7 +84,7 @@ iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
 	if (mem->va)
 		return 0;
 	else
-		return I40E_ERR_NO_MEMORY;
+		return IAVF_ERR_NO_MEMORY;
 }
 
 /**
@@ -89,12 +92,13 @@ iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
  * @hw:   pointer to the HW structure
  * @mem:  ptr to mem struct to free
  **/
-iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, struct iavf_dma_mem *mem)
+enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw,
+				     struct iavf_dma_mem *mem)
 {
 	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
 
 	if (!mem || !mem->va)
-		return I40E_ERR_PARAM;
+		return IAVF_ERR_PARAM;
 	dma_free_coherent(&adapter->pdev->dev, mem->size,
 			  mem->va, (dma_addr_t)mem->pa);
 	return 0;
@@ -106,11 +110,11 @@ iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, struct iavf_dma_mem *mem)
  * @mem:  ptr to mem struct to fill out
  * @size: size of memory requested
  **/
-iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw,
-				     struct iavf_virt_mem *mem, u32 size)
+enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw,
+					  struct iavf_virt_mem *mem, u32 size)
 {
 	if (!mem)
-		return I40E_ERR_PARAM;
+		return IAVF_ERR_PARAM;
 
 	mem->size = size;
 	mem->va = kzalloc(size, GFP_KERNEL);
@@ -118,7 +122,7 @@ iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw,
 	if (mem->va)
 		return 0;
 	else
-		return I40E_ERR_NO_MEMORY;
+		return IAVF_ERR_NO_MEMORY;
 }
 
 /**
@@ -126,10 +130,11 @@ iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw,
  * @hw:   pointer to the HW structure
  * @mem:  ptr to mem struct to free
  **/
-iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, struct iavf_virt_mem *mem)
+enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
+				      struct iavf_virt_mem *mem)
 {
 	if (!mem)
-		return I40E_ERR_PARAM;
+		return IAVF_ERR_PARAM;
 
 	/* it's ok to kfree a NULL pointer */
 	kfree(mem->va);
@@ -168,7 +173,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
 	if (!(adapter->flags &
 	      (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
 		adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-		schedule_work(&adapter->reset_task);
+		queue_work(iavf_wq, &adapter->reset_task);
 	}
 }
 
@@ -287,7 +292,7 @@ static irqreturn_t iavf_msix_aq(int irq, void *data)
 	rd32(hw, IAVF_VFINT_ICR0_ENA1);
 
 	/* schedule work on the private workqueue */
-	schedule_work(&adapter->adminq_task);
+	queue_work(iavf_wq, &adapter->adminq_task);
 
 	return IRQ_HANDLED;
 }
@@ -657,14 +662,13 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, u16 vlan)
 
 	f = iavf_find_vlan(adapter, vlan);
 	if (!f) {
-		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (!f)
 			goto clearout;
 
 		f->vlan = vlan;
 
-		INIT_LIST_HEAD(&f->list);
-		list_add(&f->list, &adapter->vlan_filter_list);
+		list_add_tail(&f->list, &adapter->vlan_filter_list);
 		f->add = true;
 		adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
 	}
@@ -979,7 +983,7 @@ static void iavf_up_complete(struct iavf_adapter *adapter)
 	adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES;
 	if (CLIENT_ENABLED(adapter))
 		adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN;
-	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
 }
 
 /**
@@ -1043,7 +1047,7 @@ void iavf_down(struct iavf_adapter *adapter)
 		adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
-	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
 }
 
 /**
@@ -1227,8 +1231,8 @@ out:
  **/
 static int iavf_config_rss_aq(struct iavf_adapter *adapter)
 {
-	struct i40e_aqc_get_set_rss_key_data *rss_key =
-		(struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key;
+	struct iavf_aqc_get_set_rss_key_data *rss_key =
+		(struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key;
 	struct iavf_hw *hw = &adapter->hw;
 	int ret = 0;
 
@@ -1532,136 +1536,66 @@ err:
 }
 
 /**
- * iavf_watchdog_timer - Periodic call-back timer
- * @data: pointer to adapter disguised as unsigned long
- **/
-static void iavf_watchdog_timer(struct timer_list *t)
-{
-	struct iavf_adapter *adapter = from_timer(adapter, t,
-						    watchdog_timer);
-
-	schedule_work(&adapter->watchdog_task);
-	/* timer will be rescheduled in watchdog task */
-}
-
-/**
- * iavf_watchdog_task - Periodic call-back task
- * @work: pointer to work_struct
+ * iavf_process_aq_command - process aq_required flags
+ * and sends aq command
+ * @adapter: pointer to iavf adapter structure
+ *
+ * Returns 0 on success
+ * Returns error code if no command was sent
+ * or error code if the command failed.
  **/
-static void iavf_watchdog_task(struct work_struct *work)
+static int iavf_process_aq_command(struct iavf_adapter *adapter)
 {
-	struct iavf_adapter *adapter = container_of(work,
-						      struct iavf_adapter,
-						      watchdog_task);
-	struct iavf_hw *hw = &adapter->hw;
-	u32 reg_val;
-
-	if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section))
-		goto restart_watchdog;
-
-	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) {
-		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
-			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
-		if ((reg_val == VIRTCHNL_VFR_VFACTIVE) ||
-		    (reg_val == VIRTCHNL_VFR_COMPLETED)) {
-			/* A chance for redemption! */
-			dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
-			adapter->state = __IAVF_STARTUP;
-			adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
-			schedule_delayed_work(&adapter->init_task, 10);
-			clear_bit(__IAVF_IN_CRITICAL_TASK,
-				  &adapter->crit_section);
-			/* Don't reschedule the watchdog, since we've restarted
-			 * the init task. When init_task contacts the PF and
-			 * gets everything set up again, it'll restart the
-			 * watchdog for us. Down, boy. Sit. Stay. Woof.
-			 */
-			return;
-		}
-		adapter->aq_required = 0;
-		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-		goto watchdog_done;
-	}
-
-	if ((adapter->state < __IAVF_DOWN) ||
-	    (adapter->flags & IAVF_FLAG_RESET_PENDING))
-		goto watchdog_done;
-
-	/* check for reset */
-	reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
-	if (!(adapter->flags & IAVF_FLAG_RESET_PENDING) && !reg_val) {
-		adapter->state = __IAVF_RESETTING;
-		adapter->flags |= IAVF_FLAG_RESET_PENDING;
-		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
-		schedule_work(&adapter->reset_task);
-		adapter->aq_required = 0;
-		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-		goto watchdog_done;
-	}
-
-	/* Process admin queue tasks. After init, everything gets done
-	 * here so we don't race on the admin queue.
-	 */
-	if (adapter->current_op) {
-		if (!iavf_asq_done(hw)) {
-			dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n");
-			iavf_send_api_ver(adapter);
-		}
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG) {
-		iavf_send_vf_config_msg(adapter);
-		goto watchdog_done;
-	}
-
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG)
+		return iavf_send_vf_config_msg(adapter);
 	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) {
 		iavf_disable_queues(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) {
 		iavf_map_queues(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) {
 		iavf_add_ether_addrs(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) {
 		iavf_add_vlans(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) {
 		iavf_del_ether_addrs(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) {
 		iavf_del_vlans(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) {
 		iavf_enable_vlan_stripping(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) {
 		iavf_disable_vlan_stripping(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) {
 		iavf_configure_queues(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) {
 		iavf_enable_queues(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) {
@@ -1669,81 +1603,414 @@ static void iavf_watchdog_task(struct work_struct *work)
 		 * PF, so we don't have to set current_op as we will
 		 * not get a response through the ARQ.
 		 */
-		iavf_init_rss(adapter);
 		adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS;
-		goto watchdog_done;
+		return 0;
 	}
 	if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) {
 		iavf_get_hena(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 	if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) {
 		iavf_set_hena(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) {
 		iavf_set_rss_key(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) {
 		iavf_set_rss_lut(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) {
 		iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
 				       FLAG_VF_MULTICAST_PROMISC);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) {
 		iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) &&
 	    (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
 		iavf_set_promiscuous(adapter, 0);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) {
 		iavf_enable_channels(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) {
 		iavf_disable_channels(adapter);
-		goto watchdog_done;
+		return 0;
 	}
-
 	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
 		iavf_add_cloud_filter(adapter);
-		goto watchdog_done;
+		return 0;
 	}
 
 	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
 		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		iavf_add_cloud_filter(adapter);
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+/**
+ * iavf_startup - first step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_STARTUP driver state.
+ * When success the state is changed to __IAVF_INIT_VERSION_CHECK
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_startup(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err;
+
+	WARN_ON(adapter->state != __IAVF_STARTUP);
+
+	/* driver loaded, probe complete */
+	adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+	err = iavf_set_mac_type(hw);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err);
+		goto err;
+	}
+
+	err = iavf_check_reset_complete(hw);
+	if (err) {
+		dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
+			 err);
+		goto err;
+	}
+	hw->aq.num_arq_entries = IAVF_AQ_LEN;
+	hw->aq.num_asq_entries = IAVF_AQ_LEN;
+	hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+	hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+
+	err = iavf_init_adminq(hw);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err);
+		goto err;
+	}
+	err = iavf_send_api_ver(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err);
+		iavf_shutdown_adminq(hw);
+		goto err;
+	}
+	adapter->state = __IAVF_INIT_VERSION_CHECK;
+err:
+	return err;
+}
+
+/**
+ * iavf_init_version_check - second step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_VERSION_CHECK driver state.
+ * When success the state is changed to __IAVF_INIT_GET_RESOURCES
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_init_version_check(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err = -EAGAIN;
+
+	WARN_ON(adapter->state != __IAVF_INIT_VERSION_CHECK);
+
+	if (!iavf_asq_done(hw)) {
+		dev_err(&pdev->dev, "Admin queue command never completed\n");
+		iavf_shutdown_adminq(hw);
+		adapter->state = __IAVF_STARTUP;
+		goto err;
+	}
+
+	/* aq msg sent, awaiting reply */
+	err = iavf_verify_api_ver(adapter);
+	if (err) {
+		if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK)
+			err = iavf_send_api_ver(adapter);
+		else
+			dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
+				adapter->pf_version.major,
+				adapter->pf_version.minor,
+				VIRTCHNL_VERSION_MAJOR,
+				VIRTCHNL_VERSION_MINOR);
+		goto err;
+	}
+	err = iavf_send_vf_config_msg(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to send config request (%d)\n",
+			err);
+		goto err;
+	}
+	adapter->state = __IAVF_INIT_GET_RESOURCES;
+
+err:
+	return err;
+}
+
+/**
+ * iavf_init_get_resources - third step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_GET_RESOURCES driver state and
+ * finishes driver initialization procedure.
+ * When success the state is changed to __IAVF_DOWN
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_init_get_resources(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err = 0, bufsz;
+
+	WARN_ON(adapter->state != __IAVF_INIT_GET_RESOURCES);
+	/* aq msg sent, awaiting reply */
+	if (!adapter->vf_res) {
+		bufsz = sizeof(struct virtchnl_vf_resource) +
+			(IAVF_MAX_VF_VSI *
+			sizeof(struct virtchnl_vsi_resource));
+		adapter->vf_res = kzalloc(bufsz, GFP_KERNEL);
+		if (!adapter->vf_res)
+			goto err;
+	}
+	err = iavf_get_vf_config(adapter);
+	if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) {
+		err = iavf_send_vf_config_msg(adapter);
+		goto err;
+	} else if (err == IAVF_ERR_PARAM) {
+		/* We only get ERR_PARAM if the device is in a very bad
+		 * state or if we've been disabled for previous bad
+		 * behavior. Either way, we're done now.
+		 */
+		iavf_shutdown_adminq(hw);
+		dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
+		return 0;
+	}
+	if (err) {
+		dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err);
+		goto err_alloc;
+	}
+
+	if (iavf_process_config(adapter))
+		goto err_alloc;
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+
+	adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED;
+
+	netdev->netdev_ops = &iavf_netdev_ops;
+	iavf_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD;
+
+	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
+		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
+			 adapter->hw.mac.addr);
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+	} else {
+		adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF;
+		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+	}
+
+	adapter->tx_desc_count = IAVF_DEFAULT_TXD;
+	adapter->rx_desc_count = IAVF_DEFAULT_RXD;
+	err = iavf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err_sw_init;
+	iavf_map_rings_to_vectors(adapter);
+	if (adapter->vf_res->vf_cap_flags &
+		VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE;
+
+	err = iavf_request_misc_irq(adapter);
+	if (err)
+		goto err_sw_init;
+
+	netif_carrier_off(netdev);
+	adapter->link_up = false;
+
+	/* set the semaphore to prevent any callbacks after device registration
+	 * up to time when state of driver will be set to __IAVF_DOWN
+	 */
+	rtnl_lock();
+	if (!adapter->netdev_registered) {
+		err = register_netdevice(netdev);
+		if (err) {
+			rtnl_unlock();
+			goto err_register;
+		}
+	}
+
+	adapter->netdev_registered = true;
+
+	netif_tx_stop_all_queues(netdev);
+	if (CLIENT_ALLOWED(adapter)) {
+		err = iavf_lan_add_device(adapter);
+		if (err) {
+			rtnl_unlock();
+			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
+				 err);
+		}
+	}
+	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
+	if (netdev->features & NETIF_F_GRO)
+		dev_info(&pdev->dev, "GRO is enabled\n");
+
+	adapter->state = __IAVF_DOWN;
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+	rtnl_unlock();
+
+	iavf_misc_irq_enable(adapter);
+	wake_up(&adapter->down_waitqueue);
+
+	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
+	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
+	if (!adapter->rss_key || !adapter->rss_lut)
+		goto err_mem;
+	if (RSS_AQ(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+	else
+		iavf_init_rss(adapter);
+
+	return err;
+err_mem:
+	iavf_free_rss(adapter);
+err_register:
+	iavf_free_misc_irq(adapter);
+err_sw_init:
+	iavf_reset_interrupt_capability(adapter);
+err_alloc:
+	kfree(adapter->vf_res);
+	adapter->vf_res = NULL;
+err:
+	return err;
+}
+
+/**
+ * iavf_watchdog_task - Periodic call-back task
+ * @work: pointer to work_struct
+ **/
+static void iavf_watchdog_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						    struct iavf_adapter,
+						    watchdog_task.work);
+	struct iavf_hw *hw = &adapter->hw;
+	u32 reg_val;
+
+	if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section))
+		goto restart_watchdog;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		adapter->state = __IAVF_COMM_FAILED;
+
+	switch (adapter->state) {
+	case __IAVF_COMM_FAILED:
+		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
+		    reg_val == VIRTCHNL_VFR_COMPLETED) {
+			/* A chance for redemption! */
+			dev_err(&adapter->pdev->dev,
+				"Hardware came out of reset. Attempting reinit.\n");
+			adapter->state = __IAVF_STARTUP;
+			adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+			queue_delayed_work(iavf_wq, &adapter->init_task, 10);
+			clear_bit(__IAVF_IN_CRITICAL_TASK,
+				  &adapter->crit_section);
+			/* Don't reschedule the watchdog, since we've restarted
+			 * the init task. When init_task contacts the PF and
+			 * gets everything set up again, it'll restart the
+			 * watchdog for us. Down, boy. Sit. Stay. Woof.
+			 */
+			return;
+		}
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		clear_bit(__IAVF_IN_CRITICAL_TASK,
+			  &adapter->crit_section);
+		queue_delayed_work(iavf_wq,
+				   &adapter->watchdog_task,
+				   msecs_to_jiffies(10));
 		goto watchdog_done;
+	case __IAVF_RESETTING:
+		clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+		return;
+	case __IAVF_DOWN:
+	case __IAVF_DOWN_PENDING:
+	case __IAVF_TESTING:
+	case __IAVF_RUNNING:
+		if (adapter->current_op) {
+			if (!iavf_asq_done(hw)) {
+				dev_dbg(&adapter->pdev->dev,
+					"Admin queue timeout\n");
+				iavf_send_api_ver(adapter);
+			}
+		} else {
+			if (!iavf_process_aq_command(adapter) &&
+			    adapter->state == __IAVF_RUNNING)
+				iavf_request_stats(adapter);
+		}
+		break;
+	case __IAVF_REMOVE:
+		clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+		return;
+	default:
+		goto restart_watchdog;
 	}
 
-	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
+		/* check for hw reset */
+	reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+	if (!reg_val) {
+		adapter->state = __IAVF_RESETTING;
+		adapter->flags |= IAVF_FLAG_RESET_PENDING;
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+		queue_work(iavf_wq, &adapter->reset_task);
+		goto watchdog_done;
+	}
 
-	if (adapter->state == __IAVF_RUNNING)
-		iavf_request_stats(adapter);
+	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 watchdog_done:
-	if (adapter->state == __IAVF_RUNNING)
+	if (adapter->state == __IAVF_RUNNING ||
+	    adapter->state == __IAVF_COMM_FAILED)
 		iavf_detect_recover_hung(&adapter->vsi);
 	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
 restart_watchdog:
-	if (adapter->state == __IAVF_REMOVE)
-		return;
 	if (adapter->aq_required)
-		mod_timer(&adapter->watchdog_timer,
-			  jiffies + msecs_to_jiffies(20));
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(20));
 	else
-		mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
-	schedule_work(&adapter->adminq_task);
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+	queue_work(iavf_wq, &adapter->adminq_task);
 }
 
 static void iavf_disable_vf(struct iavf_adapter *adapter)
@@ -1967,7 +2234,7 @@ continue_reset:
 	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
 	iavf_misc_irq_enable(adapter);
 
-	mod_timer(&adapter->watchdog_timer, jiffies + 2);
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
 
 	/* We were running when the reset started, so we need to restore some
 	 * state here.
@@ -2020,9 +2287,9 @@ static void iavf_adminq_task(struct work_struct *work)
 	struct iavf_adapter *adapter =
 		container_of(work, struct iavf_adapter, adminq_task);
 	struct iavf_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
+	struct iavf_arq_event_info event;
 	enum virtchnl_ops v_op;
-	iavf_status ret, v_ret;
+	enum iavf_status ret, v_ret;
 	u32 val, oldval;
 	u16 pending;
 
@@ -2037,7 +2304,7 @@ static void iavf_adminq_task(struct work_struct *work)
 	do {
 		ret = iavf_clean_arq_element(hw, &event, &pending);
 		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
-		v_ret = (iavf_status)le32_to_cpu(event.desc.cookie_low);
+		v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
 
 		if (ret || !v_op)
 			break; /* No event to process or error cleaning ARQ */
@@ -2239,22 +2506,22 @@ static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter,
 	int speed = 0, ret = 0;
 
 	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
+	case IAVF_LINK_SPEED_40GB:
 		speed = 40000;
 		break;
-	case I40E_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_25GB:
 		speed = 25000;
 		break;
-	case I40E_LINK_SPEED_20GB:
+	case IAVF_LINK_SPEED_20GB:
 		speed = 20000;
 		break;
-	case I40E_LINK_SPEED_10GB:
+	case IAVF_LINK_SPEED_10GB:
 		speed = 10000;
 		break;
-	case I40E_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_1GB:
 		speed = 1000;
 		break;
-	case I40E_LINK_SPEED_100MB:
+	case IAVF_LINK_SPEED_100MB:
 		speed = 100;
 		break;
 	default:
@@ -2432,14 +2699,14 @@ exit:
 /**
  * iavf_parse_cls_flower - Parse tc flower filters provided by kernel
  * @adapter: board private structure
- * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @cls_flower: pointer to struct flow_cls_offload
  * @filter: pointer to cloud filter structure
  */
 static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
-				 struct tc_cls_flower_offload *f,
+				 struct flow_cls_offload *f,
 				 struct iavf_cloud_filter *filter)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0;
 	u16 n_proto_key = 0;
@@ -2508,7 +2775,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
 					match.mask->dst);
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 
@@ -2518,7 +2785,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
 					match.mask->src);
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 
@@ -2553,7 +2820,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
 					match.mask->vlan_id);
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
@@ -2577,7 +2844,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
 					be32_to_cpu(match.mask->dst));
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 
@@ -2587,13 +2854,13 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
 					be32_to_cpu(match.mask->dst));
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 
 		if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) {
 			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
-			return I40E_ERR_CONFIG;
+			return IAVF_ERR_CONFIG;
 		}
 		if (match.key->dst) {
 			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
@@ -2614,7 +2881,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 		if (ipv6_addr_any(&match.mask->dst)) {
 			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
 				IPV6_ADDR_ANY);
-			return I40E_ERR_CONFIG;
+			return IAVF_ERR_CONFIG;
 		}
 
 		/* src and dest IPv6 address should not be LOOPBACK
@@ -2624,7 +2891,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 		    ipv6_addr_loopback(&match.key->src)) {
 			dev_err(&adapter->pdev->dev,
 				"ipv6 addr should not be loopback\n");
-			return I40E_ERR_CONFIG;
+			return IAVF_ERR_CONFIG;
 		}
 		if (!ipv6_addr_any(&match.mask->dst) ||
 		    !ipv6_addr_any(&match.mask->src))
@@ -2649,7 +2916,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
 					be16_to_cpu(match.mask->src));
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 
@@ -2659,7 +2926,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
 					be16_to_cpu(match.mask->dst));
-				return I40E_ERR_CONFIG;
+				return IAVF_ERR_CONFIG;
 			}
 		}
 		if (match.key->dst) {
@@ -2704,10 +2971,10 @@ static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc,
 /**
  * iavf_configure_clsflower - Add tc flower filters
  * @adapter: board private structure
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  */
 static int iavf_configure_clsflower(struct iavf_adapter *adapter,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
 	struct iavf_cloud_filter *filter = NULL;
@@ -2783,10 +3050,10 @@ static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter,
 /**
  * iavf_delete_clsflower - Remove tc flower filters
  * @adapter: board private structure
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  */
 static int iavf_delete_clsflower(struct iavf_adapter *adapter,
-				 struct tc_cls_flower_offload *cls_flower)
+				 struct flow_cls_offload *cls_flower)
 {
 	struct iavf_cloud_filter *filter = NULL;
 	int err = 0;
@@ -2810,17 +3077,17 @@ static int iavf_delete_clsflower(struct iavf_adapter *adapter,
  * @type_data: offload data
  */
 static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	if (cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return iavf_configure_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return iavf_delete_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
@@ -2846,34 +3113,7 @@ static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-/**
- * iavf_setup_tc_block - register callbacks for tc
- * @netdev: network interface device structure
- * @f: tc offload data
- *
- * This function registers block callbacks for tc
- * offloads
- **/
-static int iavf_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct iavf_adapter *adapter = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, iavf_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, iavf_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(iavf_block_cb_list);
 
 /**
  * iavf_setup_tc - configure multiple traffic classes
@@ -2889,11 +3129,16 @@ static int iavf_setup_tc_block(struct net_device *dev,
 static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			 void *type_data)
 {
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
 	switch (type) {
 	case TC_SETUP_QDISC_MQPRIO:
 		return __iavf_setup_tc(netdev, type_data);
 	case TC_SETUP_BLOCK:
-		return iavf_setup_tc_block(netdev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &iavf_block_cb_list,
+						  iavf_setup_tc_block_cb,
+						  adapter, adapter, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2908,7 +3153,7 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
  * The open entry point is called when a network interface is made
  * active by the system (IFF_UP).  At this point all resources needed
  * for transmit and receive operations are allocated, the interrupt
- * handler is registered with the OS, the watchdog timer is started,
+ * handler is registered with the OS, the watchdog is started,
  * and the stack is notified that the interface is ready.
  **/
 static int iavf_open(struct net_device *netdev)
@@ -3020,7 +3265,7 @@ static int iavf_close(struct net_device *netdev)
 
 	status = wait_event_timeout(adapter->down_waitqueue,
 				    adapter->state == __IAVF_DOWN,
-				    msecs_to_jiffies(200));
+				    msecs_to_jiffies(500));
 	if (!status)
 		netdev_warn(netdev, "Device resources not yet released\n");
 	return 0;
@@ -3043,7 +3288,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
 		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
 	}
 	adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-	schedule_work(&adapter->reset_task);
+	queue_work(iavf_wq, &adapter->reset_task);
 
 	return 0;
 }
@@ -3348,217 +3593,41 @@ int iavf_process_config(struct iavf_adapter *adapter)
 static void iavf_init_task(struct work_struct *work)
 {
 	struct iavf_adapter *adapter = container_of(work,
-						      struct iavf_adapter,
-						      init_task.work);
-	struct net_device *netdev = adapter->netdev;
+						    struct iavf_adapter,
+						    init_task.work);
 	struct iavf_hw *hw = &adapter->hw;
-	struct pci_dev *pdev = adapter->pdev;
-	int err, bufsz;
 
 	switch (adapter->state) {
 	case __IAVF_STARTUP:
-		/* driver loaded, probe complete */
-		adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
-		adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
-		err = iavf_set_mac_type(hw);
-		if (err) {
-			dev_err(&pdev->dev, "Failed to set MAC type (%d)\n",
-				err);
-			goto err;
-		}
-		err = iavf_check_reset_complete(hw);
-		if (err) {
-			dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
-				 err);
-			goto err;
-		}
-		hw->aq.num_arq_entries = IAVF_AQ_LEN;
-		hw->aq.num_asq_entries = IAVF_AQ_LEN;
-		hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
-		hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
-
-		err = iavf_init_adminq(hw);
-		if (err) {
-			dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n",
-				err);
-			goto err;
-		}
-		err = iavf_send_api_ver(adapter);
-		if (err) {
-			dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err);
-			iavf_shutdown_adminq(hw);
-			goto err;
-		}
-		adapter->state = __IAVF_INIT_VERSION_CHECK;
-		goto restart;
+		if (iavf_startup(adapter) < 0)
+			goto init_failed;
+		break;
 	case __IAVF_INIT_VERSION_CHECK:
-		if (!iavf_asq_done(hw)) {
-			dev_err(&pdev->dev, "Admin queue command never completed\n");
-			iavf_shutdown_adminq(hw);
-			adapter->state = __IAVF_STARTUP;
-			goto err;
-		}
-
-		/* aq msg sent, awaiting reply */
-		err = iavf_verify_api_ver(adapter);
-		if (err) {
-			if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK)
-				err = iavf_send_api_ver(adapter);
-			else
-				dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
-					adapter->pf_version.major,
-					adapter->pf_version.minor,
-					VIRTCHNL_VERSION_MAJOR,
-					VIRTCHNL_VERSION_MINOR);
-			goto err;
-		}
-		err = iavf_send_vf_config_msg(adapter);
-		if (err) {
-			dev_err(&pdev->dev, "Unable to send config request (%d)\n",
-				err);
-			goto err;
-		}
-		adapter->state = __IAVF_INIT_GET_RESOURCES;
-		goto restart;
-	case __IAVF_INIT_GET_RESOURCES:
-		/* aq msg sent, awaiting reply */
-		if (!adapter->vf_res) {
-			bufsz = sizeof(struct virtchnl_vf_resource) +
-				(IAVF_MAX_VF_VSI *
-				 sizeof(struct virtchnl_vsi_resource));
-			adapter->vf_res = kzalloc(bufsz, GFP_KERNEL);
-			if (!adapter->vf_res)
-				goto err;
-		}
-		err = iavf_get_vf_config(adapter);
-		if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) {
-			err = iavf_send_vf_config_msg(adapter);
-			goto err;
-		} else if (err == I40E_ERR_PARAM) {
-			/* We only get ERR_PARAM if the device is in a very bad
-			 * state or if we've been disabled for previous bad
-			 * behavior. Either way, we're done now.
-			 */
-			iavf_shutdown_adminq(hw);
-			dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
-			return;
-		}
-		if (err) {
-			dev_err(&pdev->dev, "Unable to get VF config (%d)\n",
-				err);
-			goto err_alloc;
-		}
-		adapter->state = __IAVF_INIT_SW;
+		if (iavf_init_version_check(adapter) < 0)
+			goto init_failed;
 		break;
+	case __IAVF_INIT_GET_RESOURCES:
+		if (iavf_init_get_resources(adapter) < 0)
+			goto init_failed;
+		return;
 	default:
-		goto err_alloc;
-	}
-
-	if (iavf_process_config(adapter))
-		goto err_alloc;
-	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-
-	adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED;
-
-	netdev->netdev_ops = &iavf_netdev_ops;
-	iavf_set_ethtool_ops(netdev);
-	netdev->watchdog_timeo = 5 * HZ;
-
-	/* MTU range: 68 - 9710 */
-	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD;
-
-	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
-		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
-			 adapter->hw.mac.addr);
-		eth_hw_addr_random(netdev);
-		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
-	} else {
-		adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF;
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
-		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
-	}
-
-	timer_setup(&adapter->watchdog_timer, iavf_watchdog_timer, 0);
-	mod_timer(&adapter->watchdog_timer, jiffies + 1);
-
-	adapter->tx_desc_count = IAVF_DEFAULT_TXD;
-	adapter->rx_desc_count = IAVF_DEFAULT_RXD;
-	err = iavf_init_interrupt_scheme(adapter);
-	if (err)
-		goto err_sw_init;
-	iavf_map_rings_to_vectors(adapter);
-	if (adapter->vf_res->vf_cap_flags &
-	    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
-		adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE;
-
-	err = iavf_request_misc_irq(adapter);
-	if (err)
-		goto err_sw_init;
-
-	netif_carrier_off(netdev);
-	adapter->link_up = false;
-
-	if (!adapter->netdev_registered) {
-		err = register_netdev(netdev);
-		if (err)
-			goto err_register;
-	}
-
-	adapter->netdev_registered = true;
-
-	netif_tx_stop_all_queues(netdev);
-	if (CLIENT_ALLOWED(adapter)) {
-		err = iavf_lan_add_device(adapter);
-		if (err)
-			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
-				 err);
+		goto init_failed;
 	}
 
-	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
-	if (netdev->features & NETIF_F_GRO)
-		dev_info(&pdev->dev, "GRO is enabled\n");
-
-	adapter->state = __IAVF_DOWN;
-	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
-	iavf_misc_irq_enable(adapter);
-	wake_up(&adapter->down_waitqueue);
-
-	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
-	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
-	if (!adapter->rss_key || !adapter->rss_lut)
-		goto err_mem;
-
-	if (RSS_AQ(adapter)) {
-		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
-		mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-	} else {
-		iavf_init_rss(adapter);
-	}
-	return;
-restart:
-	schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30));
+	queue_delayed_work(iavf_wq, &adapter->init_task,
+			   msecs_to_jiffies(30));
 	return;
-err_mem:
-	iavf_free_rss(adapter);
-err_register:
-	iavf_free_misc_irq(adapter);
-err_sw_init:
-	iavf_reset_interrupt_capability(adapter);
-err_alloc:
-	kfree(adapter->vf_res);
-	adapter->vf_res = NULL;
-err:
-	/* Things went into the weeds, so try again later */
+init_failed:
 	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
-		dev_err(&pdev->dev, "Failed to communicate with PF; waiting before retry\n");
+		dev_err(&adapter->pdev->dev,
+			"Failed to communicate with PF; waiting before retry\n");
 		adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
 		iavf_shutdown_adminq(hw);
 		adapter->state = __IAVF_STARTUP;
-		schedule_delayed_work(&adapter->init_task, HZ * 5);
+		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
 		return;
 	}
-	schedule_delayed_work(&adapter->init_task, HZ);
+	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
 }
 
 /**
@@ -3683,11 +3752,11 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	INIT_WORK(&adapter->reset_task, iavf_reset_task);
 	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
-	INIT_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+	INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
 	INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
 	INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task);
-	schedule_delayed_work(&adapter->init_task,
-			      msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
+	queue_delayed_work(iavf_wq, &adapter->init_task,
+			   msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
 
 	/* Setup the wait queue for indicating transition to down status */
 	init_waitqueue_head(&adapter->down_waitqueue);
@@ -3783,7 +3852,7 @@ static int iavf_resume(struct pci_dev *pdev)
 		return err;
 	}
 
-	schedule_work(&adapter->reset_task);
+	queue_work(iavf_wq, &adapter->reset_task);
 
 	netif_device_attach(netdev);
 
@@ -3843,8 +3912,7 @@ static void iavf_remove(struct pci_dev *pdev)
 	iavf_reset_interrupt_capability(adapter);
 	iavf_free_q_vectors(adapter);
 
-	if (adapter->watchdog_timer.function)
-		del_timer_sync(&adapter->watchdog_timer);
+	cancel_delayed_work_sync(&adapter->watchdog_task);
 
 	cancel_work_sync(&adapter->adminq_task);
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
index e6e0b0328706..a452ce90679a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
@@ -44,9 +44,12 @@ struct iavf_virt_mem {
 #define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s)
 #define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m)
 
-#define iavf_debug(h, m, s, ...)  iavf_debug_d(h, m, s, ##__VA_ARGS__)
-extern void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
-	__attribute__ ((format(gnu_printf, 3, 4)));
+#define iavf_debug(h, m, s, ...)				\
+do {								\
+	if (((m) & (h)->debug_mask))				\
+		pr_info("iavf %02x:%02x.%x " s,			\
+			(h)->bus.bus_id, (h)->bus.device,	\
+			(h)->bus.func, ##__VA_ARGS__);		\
+} while (0)
 
-typedef enum iavf_status_code iavf_status;
 #endif /* _IAVF_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
index d6685103af39..edebfbbcffdc 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
@@ -16,39 +16,40 @@
  */
 
 /* adminq functions */
-iavf_status iavf_init_adminq(struct iavf_hw *hw);
-iavf_status iavf_shutdown_adminq(struct iavf_hw *hw);
-void i40e_adminq_init_ring_data(struct iavf_hw *hw);
-iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
-				   struct i40e_arq_event_info *e,
-				   u16 *events_pending);
-iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc,
-				  void *buff, /* can be NULL */
-				  u16 buff_size,
-				  struct i40e_asq_cmd_details *cmd_details);
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw);
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw);
+void iavf_adminq_init_ring_data(struct iavf_hw *hw);
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *events_pending);
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16 buff_size,
+				       struct iavf_asq_cmd_details *cmd_details);
 bool iavf_asq_done(struct iavf_hw *hw);
 
 /* debug function for adminq */
 void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask,
 		   void *desc, void *buffer, u16 buf_len);
 
-void i40e_idle_aq(struct iavf_hw *hw);
+void iavf_idle_aq(struct iavf_hw *hw);
 void iavf_resume_aq(struct iavf_hw *hw);
 bool iavf_check_asq_alive(struct iavf_hw *hw);
-iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading);
-const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err);
-const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err);
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading);
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err);
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err);
 
-iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid,
-				bool pf_lut, u8 *lut, u16 lut_size);
-iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid,
-				bool pf_lut, u8 *lut, u16 lut_size);
-iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid,
-				struct i40e_aqc_get_set_rss_key_data *key);
-iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid,
-				struct i40e_aqc_get_set_rss_key_data *key);
+enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid,
+				     bool pf_lut, u8 *lut, u16 lut_size);
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid,
+				     bool pf_lut, u8 *lut, u16 lut_size);
+enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid,
+				     struct iavf_aqc_get_set_rss_key_data *key);
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid,
+				     struct iavf_aqc_get_set_rss_key_data *key);
 
-iavf_status iavf_set_mac_type(struct iavf_hw *hw);
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw);
 
 extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[];
 
@@ -59,9 +60,10 @@ static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
 
 void iavf_vf_parse_hw_config(struct iavf_hw *hw,
 			     struct virtchnl_vf_resource *msg);
-iavf_status iavf_vf_reset(struct iavf_hw *hw);
-iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
-				   enum virtchnl_ops v_opcode,
-				   iavf_status v_retval, u8 *msg, u16 msglen,
-				   struct i40e_asq_cmd_details *cmd_details);
+enum iavf_status iavf_vf_reset(struct iavf_hw *hw);
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details);
 #endif /* _IAVF_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h
index 46742fab7b8c..46e3d1f6b604 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_status.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_status.h
@@ -5,74 +5,74 @@
 #define _IAVF_STATUS_H_
 
 /* Error Codes */
-enum iavf_status_code {
-	I40E_SUCCESS				= 0,
-	I40E_ERR_NVM				= -1,
-	I40E_ERR_NVM_CHECKSUM			= -2,
-	I40E_ERR_PHY				= -3,
-	I40E_ERR_CONFIG				= -4,
-	I40E_ERR_PARAM				= -5,
-	I40E_ERR_MAC_TYPE			= -6,
-	I40E_ERR_UNKNOWN_PHY			= -7,
-	I40E_ERR_LINK_SETUP			= -8,
-	I40E_ERR_ADAPTER_STOPPED		= -9,
-	I40E_ERR_INVALID_MAC_ADDR		= -10,
-	I40E_ERR_DEVICE_NOT_SUPPORTED		= -11,
-	I40E_ERR_MASTER_REQUESTS_PENDING	= -12,
-	I40E_ERR_INVALID_LINK_SETTINGS		= -13,
-	I40E_ERR_AUTONEG_NOT_COMPLETE		= -14,
-	I40E_ERR_RESET_FAILED			= -15,
-	I40E_ERR_SWFW_SYNC			= -16,
-	I40E_ERR_NO_AVAILABLE_VSI		= -17,
-	I40E_ERR_NO_MEMORY			= -18,
-	I40E_ERR_BAD_PTR			= -19,
-	I40E_ERR_RING_FULL			= -20,
-	I40E_ERR_INVALID_PD_ID			= -21,
-	I40E_ERR_INVALID_QP_ID			= -22,
-	I40E_ERR_INVALID_CQ_ID			= -23,
-	I40E_ERR_INVALID_CEQ_ID			= -24,
-	I40E_ERR_INVALID_AEQ_ID			= -25,
-	I40E_ERR_INVALID_SIZE			= -26,
-	I40E_ERR_INVALID_ARP_INDEX		= -27,
-	I40E_ERR_INVALID_FPM_FUNC_ID		= -28,
-	I40E_ERR_QP_INVALID_MSG_SIZE		= -29,
-	I40E_ERR_QP_TOOMANY_WRS_POSTED		= -30,
-	I40E_ERR_INVALID_FRAG_COUNT		= -31,
-	I40E_ERR_QUEUE_EMPTY			= -32,
-	I40E_ERR_INVALID_ALIGNMENT		= -33,
-	I40E_ERR_FLUSHED_QUEUE			= -34,
-	I40E_ERR_INVALID_PUSH_PAGE_INDEX	= -35,
-	I40E_ERR_INVALID_IMM_DATA_SIZE		= -36,
-	I40E_ERR_TIMEOUT			= -37,
-	I40E_ERR_OPCODE_MISMATCH		= -38,
-	I40E_ERR_CQP_COMPL_ERROR		= -39,
-	I40E_ERR_INVALID_VF_ID			= -40,
-	I40E_ERR_INVALID_HMCFN_ID		= -41,
-	I40E_ERR_BACKING_PAGE_ERROR		= -42,
-	I40E_ERR_NO_PBLCHUNKS_AVAILABLE		= -43,
-	I40E_ERR_INVALID_PBLE_INDEX		= -44,
-	I40E_ERR_INVALID_SD_INDEX		= -45,
-	I40E_ERR_INVALID_PAGE_DESC_INDEX	= -46,
-	I40E_ERR_INVALID_SD_TYPE		= -47,
-	I40E_ERR_MEMCPY_FAILED			= -48,
-	I40E_ERR_INVALID_HMC_OBJ_INDEX		= -49,
-	I40E_ERR_INVALID_HMC_OBJ_COUNT		= -50,
-	I40E_ERR_INVALID_SRQ_ARM_LIMIT		= -51,
-	I40E_ERR_SRQ_ENABLED			= -52,
-	I40E_ERR_ADMIN_QUEUE_ERROR		= -53,
-	I40E_ERR_ADMIN_QUEUE_TIMEOUT		= -54,
-	I40E_ERR_BUF_TOO_SHORT			= -55,
-	I40E_ERR_ADMIN_QUEUE_FULL		= -56,
-	I40E_ERR_ADMIN_QUEUE_NO_WORK		= -57,
-	I40E_ERR_BAD_IWARP_CQE			= -58,
-	I40E_ERR_NVM_BLANK_MODE			= -59,
-	I40E_ERR_NOT_IMPLEMENTED		= -60,
-	I40E_ERR_PE_DOORBELL_NOT_ENABLED	= -61,
-	I40E_ERR_DIAG_TEST_FAILED		= -62,
-	I40E_ERR_NOT_READY			= -63,
-	I40E_NOT_SUPPORTED			= -64,
-	I40E_ERR_FIRMWARE_API_VERSION		= -65,
-	I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR	= -66,
+enum iavf_status {
+	IAVF_SUCCESS				= 0,
+	IAVF_ERR_NVM				= -1,
+	IAVF_ERR_NVM_CHECKSUM			= -2,
+	IAVF_ERR_PHY				= -3,
+	IAVF_ERR_CONFIG				= -4,
+	IAVF_ERR_PARAM				= -5,
+	IAVF_ERR_MAC_TYPE			= -6,
+	IAVF_ERR_UNKNOWN_PHY			= -7,
+	IAVF_ERR_LINK_SETUP			= -8,
+	IAVF_ERR_ADAPTER_STOPPED		= -9,
+	IAVF_ERR_INVALID_MAC_ADDR		= -10,
+	IAVF_ERR_DEVICE_NOT_SUPPORTED		= -11,
+	IAVF_ERR_MASTER_REQUESTS_PENDING	= -12,
+	IAVF_ERR_INVALID_LINK_SETTINGS		= -13,
+	IAVF_ERR_AUTONEG_NOT_COMPLETE		= -14,
+	IAVF_ERR_RESET_FAILED			= -15,
+	IAVF_ERR_SWFW_SYNC			= -16,
+	IAVF_ERR_NO_AVAILABLE_VSI		= -17,
+	IAVF_ERR_NO_MEMORY			= -18,
+	IAVF_ERR_BAD_PTR			= -19,
+	IAVF_ERR_RING_FULL			= -20,
+	IAVF_ERR_INVALID_PD_ID			= -21,
+	IAVF_ERR_INVALID_QP_ID			= -22,
+	IAVF_ERR_INVALID_CQ_ID			= -23,
+	IAVF_ERR_INVALID_CEQ_ID			= -24,
+	IAVF_ERR_INVALID_AEQ_ID			= -25,
+	IAVF_ERR_INVALID_SIZE			= -26,
+	IAVF_ERR_INVALID_ARP_INDEX		= -27,
+	IAVF_ERR_INVALID_FPM_FUNC_ID		= -28,
+	IAVF_ERR_QP_INVALID_MSG_SIZE		= -29,
+	IAVF_ERR_QP_TOOMANY_WRS_POSTED		= -30,
+	IAVF_ERR_INVALID_FRAG_COUNT		= -31,
+	IAVF_ERR_QUEUE_EMPTY			= -32,
+	IAVF_ERR_INVALID_ALIGNMENT		= -33,
+	IAVF_ERR_FLUSHED_QUEUE			= -34,
+	IAVF_ERR_INVALID_PUSH_PAGE_INDEX	= -35,
+	IAVF_ERR_INVALID_IMM_DATA_SIZE		= -36,
+	IAVF_ERR_TIMEOUT			= -37,
+	IAVF_ERR_OPCODE_MISMATCH		= -38,
+	IAVF_ERR_CQP_COMPL_ERROR		= -39,
+	IAVF_ERR_INVALID_VF_ID			= -40,
+	IAVF_ERR_INVALID_HMCFN_ID		= -41,
+	IAVF_ERR_BACKING_PAGE_ERROR		= -42,
+	IAVF_ERR_NO_PBLCHUNKS_AVAILABLE		= -43,
+	IAVF_ERR_INVALID_PBLE_INDEX		= -44,
+	IAVF_ERR_INVALID_SD_INDEX		= -45,
+	IAVF_ERR_INVALID_PAGE_DESC_INDEX	= -46,
+	IAVF_ERR_INVALID_SD_TYPE		= -47,
+	IAVF_ERR_MEMCPY_FAILED			= -48,
+	IAVF_ERR_INVALID_HMC_OBJ_INDEX		= -49,
+	IAVF_ERR_INVALID_HMC_OBJ_COUNT		= -50,
+	IAVF_ERR_INVALID_SRQ_ARM_LIMIT		= -51,
+	IAVF_ERR_SRQ_ENABLED			= -52,
+	IAVF_ERR_ADMIN_QUEUE_ERROR		= -53,
+	IAVF_ERR_ADMIN_QUEUE_TIMEOUT		= -54,
+	IAVF_ERR_BUF_TOO_SHORT			= -55,
+	IAVF_ERR_ADMIN_QUEUE_FULL		= -56,
+	IAVF_ERR_ADMIN_QUEUE_NO_WORK		= -57,
+	IAVF_ERR_BAD_IWARP_CQE			= -58,
+	IAVF_ERR_NVM_BLANK_MODE			= -59,
+	IAVF_ERR_NOT_IMPLEMENTED		= -60,
+	IAVF_ERR_PE_DOORBELL_NOT_ENABLED	= -61,
+	IAVF_ERR_DIAG_TEST_FAILED		= -62,
+	IAVF_ERR_NOT_READY			= -63,
+	IAVF_NOT_SUPPORTED			= -64,
+	IAVF_ERR_FIRMWARE_API_VERSION		= -65,
+	IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR	= -66,
 };
 
 #endif /* _IAVF_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
index 1474f5539751..1058e68a02b4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_trace.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h
@@ -17,8 +17,8 @@
 /* See trace-events-sample.h for a detailed description of why this
  * guard clause is different from most normal include files.
  */
-#if !defined(_I40E_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
-#define _I40E_TRACE_H_
+#if !defined(_IAVF_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _IAVF_TRACE_H_
 
 #include <linux/tracepoint.h>
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 06d1509d57f7..0cca1b589b56 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -190,7 +190,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
 			      struct iavf_ring *tx_ring, int napi_budget)
 {
-	u16 i = tx_ring->next_to_clean;
+	int i = tx_ring->next_to_clean;
 	struct iavf_tx_buffer *tx_buf;
 	struct iavf_tx_desc *tx_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
@@ -379,19 +379,19 @@ static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector)
 	unsigned int divisor;
 
 	switch (q_vector->adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
+	case IAVF_LINK_SPEED_40GB:
 		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024;
 		break;
-	case I40E_LINK_SPEED_25GB:
-	case I40E_LINK_SPEED_20GB:
+	case IAVF_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_20GB:
 		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512;
 		break;
 	default:
-	case I40E_LINK_SPEED_10GB:
+	case IAVF_LINK_SPEED_10GB:
 		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256;
 		break;
-	case I40E_LINK_SPEED_1GB:
-	case I40E_LINK_SPEED_100MB:
+	case IAVF_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_100MB:
 		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32;
 		break;
 	}
@@ -1236,6 +1236,9 @@ static void iavf_add_rx_frag(struct iavf_ring *rx_ring,
 	unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring));
 #endif
 
+	if (!size)
+		return;
+
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
 			rx_buffer->page_offset, size, truesize);
 
@@ -1260,6 +1263,9 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
 {
 	struct iavf_rx_buffer *rx_buffer;
 
+	if (!size)
+		return NULL;
+
 	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
 	prefetchw(rx_buffer->page);
 
@@ -1290,7 +1296,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
 					  struct iavf_rx_buffer *rx_buffer,
 					  unsigned int size)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	void *va;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
 #else
@@ -1299,7 +1305,10 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
 	unsigned int headlen;
 	struct sk_buff *skb;
 
+	if (!rx_buffer)
+		return NULL;
 	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 	prefetch(va);
 #if L1_CACHE_BYTES < 128
 	prefetch(va + L1_CACHE_BYTES);
@@ -1354,7 +1363,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
 				      struct iavf_rx_buffer *rx_buffer,
 				      unsigned int size)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	void *va;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
 #else
@@ -1363,7 +1372,10 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
 #endif
 	struct sk_buff *skb;
 
+	if (!rx_buffer)
+		return NULL;
 	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 	prefetch(va);
 #if L1_CACHE_BYTES < 128
 	prefetch(va + L1_CACHE_BYTES);
@@ -1398,6 +1410,9 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
 static void iavf_put_rx_buffer(struct iavf_ring *rx_ring,
 			       struct iavf_rx_buffer *rx_buffer)
 {
+	if (!rx_buffer)
+		return;
+
 	if (iavf_can_reuse_rx_page(rx_buffer)) {
 		/* hand second half of page back to the ring */
 		iavf_reuse_rx_page(rx_ring, rx_buffer);
@@ -1496,11 +1511,12 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
 		 * verified the descriptor has been written back.
 		 */
 		dma_rmb();
+#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
+		if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
+			break;
 
 		size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
 		       IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
-		if (!size)
-			break;
 
 		iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
 		rx_buffer = iavf_get_rx_buffer(rx_ring, size);
@@ -1516,7 +1532,8 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_buff_failed++;
-			rx_buffer->pagecnt_bias++;
+			if (rx_buffer)
+				rx_buffer->pagecnt_bias++;
 			break;
 		}
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
index ca89583613fb..7190a40c540c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_type.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -7,7 +7,7 @@
 #include "iavf_status.h"
 #include "iavf_osdep.h"
 #include "iavf_register.h"
-#include "i40e_adminq.h"
+#include "iavf_adminq.h"
 #include "iavf_devids.h"
 
 #define IAVF_RXQ_CTX_DBUFF_SHIFT 7
@@ -21,7 +21,7 @@
 
 /* forward declaration */
 struct iavf_hw;
-typedef void (*I40E_ADMINQ_CALLBACK)(struct iavf_hw *, struct i40e_aq_desc *);
+typedef void (*IAVF_ADMINQ_CALLBACK)(struct iavf_hw *, struct iavf_aq_desc *);
 
 /* Data type manipulation macros. */
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index e64751da0921..d49d58a6de80 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -22,7 +22,7 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter,
 			    enum virtchnl_ops op, u8 *msg, u16 len)
 {
 	struct iavf_hw *hw = &adapter->hw;
-	iavf_status err;
+	enum iavf_status err;
 
 	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
 		return 0; /* nothing to see here, move along */
@@ -41,7 +41,7 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter,
  *
  * Send API version admin queue message to the PF. The reply is not checked
  * in this function. Returns 0 if the message was successfully
- * sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not.
+ * sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
  **/
 int iavf_send_api_ver(struct iavf_adapter *adapter)
 {
@@ -60,16 +60,16 @@ int iavf_send_api_ver(struct iavf_adapter *adapter)
  *
  * Compare API versions with the PF. Must be called after admin queue is
  * initialized. Returns 0 if API versions match, -EIO if they do not,
- * I40E_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors
+ * IAVF_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors
  * from the firmware are propagated.
  **/
 int iavf_verify_api_ver(struct iavf_adapter *adapter)
 {
 	struct virtchnl_version_info *pf_vvi;
 	struct iavf_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
+	struct iavf_arq_event_info event;
 	enum virtchnl_ops op;
-	iavf_status err;
+	enum iavf_status err;
 
 	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
 	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
@@ -92,7 +92,7 @@ int iavf_verify_api_ver(struct iavf_adapter *adapter)
 	}
 
 
-	err = (iavf_status)le32_to_cpu(event.desc.cookie_low);
+	err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
 	if (err)
 		goto out_alloc;
 
@@ -123,7 +123,7 @@ out:
  *
  * Send VF configuration request admin queue message to the PF. The reply
  * is not checked in this function. Returns 0 if the message was
- * successfully sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not.
+ * successfully sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
  **/
 int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 {
@@ -189,9 +189,9 @@ static void iavf_validate_num_queues(struct iavf_adapter *adapter)
 int iavf_get_vf_config(struct iavf_adapter *adapter)
 {
 	struct iavf_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
+	struct iavf_arq_event_info event;
 	enum virtchnl_ops op;
-	iavf_status err;
+	enum iavf_status err;
 	u16 len;
 
 	len =  sizeof(struct virtchnl_vf_resource) +
@@ -216,7 +216,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter)
 			break;
 	}
 
-	err = (iavf_status)le32_to_cpu(event.desc.cookie_low);
+	err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
 	memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len));
 
 	/* some PFs send more queues than we should have so validate that
@@ -242,7 +242,8 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
 	struct virtchnl_vsi_queue_config_info *vqci;
 	struct virtchnl_queue_pair_info *vqpi;
 	int pairs = adapter->num_active_queues;
-	int i, len, max_frame = IAVF_MAX_RXBUFFER;
+	int i, max_frame = IAVF_MAX_RXBUFFER;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -251,8 +252,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
-	len = sizeof(struct virtchnl_vsi_queue_config_info) +
-		       (sizeof(struct virtchnl_queue_pair_info) * pairs);
+	len = struct_size(vqci, qpair, pairs);
 	vqci = kzalloc(len, GFP_KERNEL);
 	if (!vqci)
 		return;
@@ -351,8 +351,9 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 {
 	struct virtchnl_irq_map_info *vimi;
 	struct virtchnl_vector_map *vecmap;
-	int v_idx, q_vectors, len;
 	struct iavf_q_vector *q_vector;
+	int v_idx, q_vectors;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -364,9 +365,7 @@ void iavf_map_queues(struct iavf_adapter *adapter)
 
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-	len = sizeof(struct virtchnl_irq_map_info) +
-	      (adapter->num_msix_vectors *
-		sizeof(struct virtchnl_vector_map));
+	len = struct_size(vimi, vecmap, adapter->num_msix_vectors);
 	vimi = kzalloc(len, GFP_KERNEL);
 	if (!vimi)
 		return;
@@ -416,7 +415,7 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num)
 		return -EBUSY;
 	}
 
-	vfres.num_queue_pairs = num;
+	vfres.num_queue_pairs = min_t(int, num, num_online_cpus());
 
 	adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES;
 	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
@@ -433,9 +432,10 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num)
 void iavf_add_ether_addrs(struct iavf_adapter *adapter)
 {
 	struct virtchnl_ether_addr_list *veal;
-	int len, i = 0, count = 0;
 	struct iavf_mac_filter *f;
+	int i = 0, count = 0;
 	bool more = false;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -457,15 +457,13 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter)
 	}
 	adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
 
-	len = sizeof(struct virtchnl_ether_addr_list) +
-	      (count * sizeof(struct virtchnl_ether_addr));
+	len = struct_size(veal, list, count);
 	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
 		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_ether_addr_list)) /
 			sizeof(struct virtchnl_ether_addr);
-		len = sizeof(struct virtchnl_ether_addr_list) +
-		      (count * sizeof(struct virtchnl_ether_addr));
+		len = struct_size(veal, list, count);
 		more = true;
 	}
 
@@ -505,8 +503,9 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
 {
 	struct virtchnl_ether_addr_list *veal;
 	struct iavf_mac_filter *f, *ftmp;
-	int len, i = 0, count = 0;
+	int i = 0, count = 0;
 	bool more = false;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -528,15 +527,13 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
 	}
 	adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
 
-	len = sizeof(struct virtchnl_ether_addr_list) +
-	      (count * sizeof(struct virtchnl_ether_addr));
+	len = struct_size(veal, list, count);
 	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
 		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_ether_addr_list)) /
 			sizeof(struct virtchnl_ether_addr);
-		len = sizeof(struct virtchnl_ether_addr_list) +
-		      (count * sizeof(struct virtchnl_ether_addr));
+		len = struct_size(veal, list, count);
 		more = true;
 	}
 	veal = kzalloc(len, GFP_ATOMIC);
@@ -938,22 +935,22 @@ static void iavf_print_link_message(struct iavf_adapter *adapter)
 	}
 
 	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
+	case IAVF_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
-	case I40E_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_25GB:
 		speed = "25 G";
 		break;
-	case I40E_LINK_SPEED_20GB:
+	case IAVF_LINK_SPEED_20GB:
 		speed = "20 G";
 		break;
-	case I40E_LINK_SPEED_10GB:
+	case IAVF_LINK_SPEED_10GB:
 		speed = "10 G";
 		break;
-	case I40E_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_1GB:
 		speed = "1000 M";
 		break;
-	case I40E_LINK_SPEED_100MB:
+	case IAVF_LINK_SPEED_100MB:
 		speed = "100 M";
 		break;
 	default:
@@ -973,7 +970,7 @@ static void iavf_print_link_message(struct iavf_adapter *adapter)
 void iavf_enable_channels(struct iavf_adapter *adapter)
 {
 	struct virtchnl_tc_info *vti = NULL;
-	u16 len;
+	size_t len;
 	int i;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
@@ -983,9 +980,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter)
 		return;
 	}
 
-	len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
-	       sizeof(struct virtchnl_tc_info);
-
+	len = struct_size(vti, list, adapter->num_tc - 1);
 	vti = kzalloc(len, GFP_KERNEL);
 	if (!vti)
 		return;
@@ -1184,8 +1179,8 @@ void iavf_request_reset(struct iavf_adapter *adapter)
  * This function handles the reply messages.
  **/
 void iavf_virtchnl_completion(struct iavf_adapter *adapter,
-			      enum virtchnl_ops v_opcode, iavf_status v_retval,
-			      u8 *msg, u16 msglen)
+			      enum virtchnl_ops v_opcode,
+			      enum iavf_status v_retval, u8 *msg, u16 msglen)
 {
 	struct net_device *netdev = adapter->netdev;
 
@@ -1238,7 +1233,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
 				adapter->flags |= IAVF_FLAG_RESET_PENDING;
 				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
-				schedule_work(&adapter->reset_task);
+				queue_work(iavf_wq, &adapter->reset_task);
 			}
 			break;
 		default:
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 792e6e42030e..9ee6b55553c0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -44,15 +44,22 @@
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
 #define ICE_REQ_DESC_MULTIPLE	32
-#define ICE_MIN_NUM_DESC	ICE_REQ_DESC_MULTIPLE
+#define ICE_MIN_NUM_DESC	64
 #define ICE_MAX_NUM_DESC	8160
-/* set default number of Rx/Tx descriptors to the minimum between
- * ICE_MAX_NUM_DESC and the number of descriptors to fill up an entire page
+#define ICE_DFLT_MIN_RX_DESC	512
+/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the
+ * number of descriptors to fill up an entire page is greater than or equal to
+ * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to
+ * ICE_DFLT_MIN_RX_DESC
+ */
+#define ICE_DFLT_NUM_RX_DESC \
+	min_t(u16, ICE_MAX_NUM_DESC, \
+	      max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \
+			       ICE_REQ_DESC_MULTIPLE), \
+		    ICE_DFLT_MIN_RX_DESC))
+/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC
+ * and the number of descriptors to fill up an entire page
  */
-#define ICE_DFLT_NUM_RX_DESC	min_t(u16, ICE_MAX_NUM_DESC, \
-				      ALIGN(PAGE_SIZE / \
-					    sizeof(union ice_32byte_rx_desc), \
-					    ICE_REQ_DESC_MULTIPLE))
 #define ICE_DFLT_NUM_TX_DESC	min_t(u16, ICE_MAX_NUM_DESC, \
 				      ALIGN(PAGE_SIZE / \
 					    sizeof(struct ice_tx_desc), \
@@ -160,7 +167,7 @@ struct ice_tc_cfg {
 
 struct ice_res_tracker {
 	u16 num_entries;
-	u16 search_hint;
+	u16 end;
 	u16 list[1];
 };
 
@@ -182,6 +189,7 @@ struct ice_sw {
 };
 
 enum ice_state {
+	__ICE_TESTING,
 	__ICE_DOWN,
 	__ICE_NEEDS_RESTART,
 	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
@@ -244,8 +252,7 @@ struct ice_vsi {
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 	int num_q_vectors;
-	int sw_base_vector;		/* Irq base for OS reserved vectors */
-	int hw_base_vector;		/* HW (absolute) index of a vector */
+	int base_vector;		/* IRQ base for OS reserved vectors */
 	enum ice_vsi_type type;
 	u16 vsi_num;			/* HW (absolute) index of this VSI */
 	u16 idx;			/* software index in pf->vsi[] */
@@ -277,10 +284,10 @@ struct ice_vsi {
 	struct list_head tmp_sync_list;		/* MAC filters to be synced */
 	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
 
-	u8 irqs_ready;
-	u8 current_isup;		 /* Sync 'link up' logging */
-	u8 stat_offsets_loaded;
-	u8 vlan_ena;
+	u8 irqs_ready:1;
+	u8 current_isup:1;		 /* Sync 'link up' logging */
+	u8 stat_offsets_loaded:1;
+	u8 vlan_ena:1;
 
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -330,7 +337,7 @@ enum ice_pf_flags {
 	ICE_FLAG_DCB_CAPABLE,
 	ICE_FLAG_DCB_ENA,
 	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
-	ICE_FLAG_DISABLE_FW_LLDP,
+	ICE_FLAG_ENABLE_FW_LLDP,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
@@ -340,10 +347,12 @@ struct ice_pf {
 
 	/* OS reserved IRQ details */
 	struct msix_entry *msix_entries;
-	struct ice_res_tracker *sw_irq_tracker;
-
-	/* HW reserved Interrupts for this PF */
-	struct ice_res_tracker *hw_irq_tracker;
+	struct ice_res_tracker *irq_tracker;
+	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
+	 * MSIX vectors allowed on this PF.
+	 */
+	u16 sriov_base_vector;
 
 	struct ice_vsi **vsi;		/* VSIs created by the driver */
 	struct ice_sw *first_sw;	/* first switch created by firmware */
@@ -365,10 +374,8 @@ struct ice_pf {
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
-	u32 sw_oicr_idx;	/* Other interrupt cause SW vector index */
+	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
 	u32 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
-	u32 hw_oicr_idx;	/* Other interrupt cause vector HW index */
-	u32 num_avail_hw_msix;	/* remaining HW MSIX vectors left unclaimed */
 	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
 	u16 num_lan_tx;		/* num LAN Tx queues setup */
 	u16 num_lan_rx;		/* num LAN Rx queues setup */
@@ -384,7 +391,7 @@ struct ice_pf {
 	struct ice_hw_port_stats stats;
 	struct ice_hw_port_stats stats_prev;
 	struct ice_hw hw;
-	u8 stat_prev_loaded;	/* has previous stats been loaded */
+	u8 stat_prev_loaded:1; /* has previous stats been loaded */
 #ifdef CONFIG_DCB
 	u16 dcbx_cap;
 #endif /* CONFIG_DCB */
@@ -392,6 +399,7 @@ struct ice_pf {
 	unsigned long tx_timeout_last_recovery;
 	u32 tx_timeout_recovery_level;
 	char int_name[ICE_INT_NAME_STR_LEN];
+	u32 sw_int_count;
 };
 
 struct ice_netdev_priv {
@@ -409,7 +417,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 		    struct ice_q_vector *q_vector)
 {
 	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
-				((struct ice_pf *)hw->back)->hw_oicr_idx;
+				((struct ice_pf *)hw->back)->oicr_idx;
 	int itr = ICE_ITR_NONE;
 	u32 val;
 
@@ -444,17 +452,22 @@ ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 	return NULL;
 }
 
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
 int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-void ice_napi_del(struct ice_vsi *vsi);
 #ifdef CONFIG_DCB
 int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
 void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
 #endif /* CONFIG_DCB */
+int ice_open(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
 
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 6ef083002f5b..765e3c2ed045 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -35,8 +35,8 @@ struct ice_aqc_get_ver {
 
 /* Queue Shutdown (direct 0x0003) */
 struct ice_aqc_q_shutdown {
-#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
 	__le32 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
 	u8 reserved[12];
 };
 
@@ -120,11 +120,9 @@ struct ice_aqc_manage_mac_read {
 #define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
 #define ICE_AQC_MAN_MAC_READ_S			4
 #define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
-	u8 lport_num;
-	u8 lport_num_valid;
-#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID	BIT(0)
+	u8 rsvd[2];
 	u8 num_addr; /* Used in response */
-	u8 reserved[3];
+	u8 rsvd1[3];
 	__le32 addr_high;
 	__le32 addr_low;
 };
@@ -140,7 +138,7 @@ struct ice_aqc_manage_mac_read_resp {
 
 /* Manage MAC address, write command - direct (0x0108) */
 struct ice_aqc_manage_mac_write {
-	u8 port_num;
+	u8 rsvd;
 	u8 flags;
 #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
 #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
@@ -920,6 +918,8 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_EN_LINK				BIT(3)
 #define ICE_AQC_PHY_AN_MODE				BIT(4)
 #define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC				BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK				ICE_M(0xff, 0)
 	u8 low_power_ctrl;
 #define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
 	__le16 eee_cap;
@@ -932,6 +932,7 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
 	__le16 eeer_value;
 	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 phy_fw_ver[8];
 	u8 link_fec_options;
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
@@ -940,6 +941,8 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
 #define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
 #define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+#define ICE_AQC_PHY_FEC_MASK				ICE_M(0xdf, 0)
+	u8 rsvd1;	/* Byte 35 reserved */
 	u8 extended_compliance_code;
 #define ICE_MODULE_TYPE_TOTAL_BYTE			3
 	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
@@ -954,13 +957,14 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
 #define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
 	u8 qualified_module_count;
+	u8 rsvd2[7];	/* Bytes 47:41 reserved */
 #define ICE_AQC_QUAL_MOD_COUNT_MAX			16
 	struct {
 		u8 v_oui[3];
-		u8 rsvd1;
+		u8 rsvd3;
 		u8 v_part[16];
 		__le32 v_rev;
-		__le64 rsvd8;
+		__le64 rsvd4;
 	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
 };
 
@@ -1062,6 +1066,7 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
 #define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
 #define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+#define ICE_AQ_FEC_MASK			ICE_M(0x7, 0)
 	/* Pacing Config */
 #define ICE_AQ_CFG_PACING_S		3
 #define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
@@ -1112,6 +1117,14 @@ struct ice_aqc_set_event_mask {
 	u8	reserved1[6];
 };
 
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+	u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN		BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK		BIT(1)
+	u8 reserved[15];
+};
+
 /* Set Port Identification LED (direct, 0x06E9) */
 struct ice_aqc_set_port_id_led {
 	u8 lport_num;
@@ -1145,6 +1158,17 @@ struct ice_aqc_nvm {
 	__le32 addr_low;
 };
 
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+	u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY	BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC	BIT(1)
+	u8 rsvd;
+	__le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT	0xBABA
+	u8 rsvd2[12];
+};
+
 /**
  * Send to PF command (indirect 0x0801) ID is only used by PF
  *
@@ -1249,7 +1273,7 @@ struct ice_aqc_get_cee_dcb_cfg_resp {
 };
 
 /* Set Local LLDP MIB (indirect 0x0A08)
- * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
  */
 struct ice_aqc_lldp_set_local_mib {
 	u8 type;
@@ -1266,7 +1290,7 @@ struct ice_aqc_lldp_set_local_mib {
 };
 
 /* Stop/Start LLDP Agent (direct 0x0A09)
- * Used for stopping/starting specific LLDP agent. e.g. DCBx.
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
  * The same structure is used for the response, with the command field
  * being used as the status field.
  */
@@ -1539,6 +1563,7 @@ struct ice_aq_desc {
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_query_port_ets port_ets;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_nvm_checksum nvm_checksum;
 		struct ice_aqc_pf_vf_msg virt;
 		struct ice_aqc_lldp_get_mib lldp_get_mib;
 		struct ice_aqc_lldp_set_mib_change lldp_set_event;
@@ -1554,6 +1579,7 @@ struct ice_aq_desc {
 		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
 		struct ice_aqc_fw_logging fw_logging;
 		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_set_event_mask set_event_mask;
 		struct ice_aqc_get_link_status get_link_status;
@@ -1642,10 +1668,12 @@ enum ice_adminq_opc {
 	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
 	ice_aqc_opc_set_event_mask			= 0x0613,
+	ice_aqc_opc_set_mac_lb				= 0x0620,
 	ice_aqc_opc_set_port_id_led			= 0x06E9,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
+	ice_aqc_opc_nvm_checksum			= 0x0706,
 
 	/* PF/VF mailbox commands */
 	ice_mbx_opc_send_msg_to_pf			= 0x0801,
@@ -1671,6 +1699,7 @@ enum ice_adminq_opc {
 
 	/* debug commands */
 	ice_aqc_opc_fw_logging				= 0xFF09,
+	ice_aqc_opc_fw_logging_info			= 0xFF10,
 };
 
 #endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index da7878529929..2e0731c1e1a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -51,9 +51,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
  */
 void ice_dev_onetime_setup(struct ice_hw *hw)
 {
-	/* configure Rx - set non pxe mode */
-	wr32(hw, GLLAN_RCTL_0, 0x1);
-
 #define MBX_PF_VT_PFALLOC	0x00231E80
 	/* set VFs per PF */
 	wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
@@ -307,6 +304,8 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	hw_link_info->an_info = link_data.an_info;
 	hw_link_info->ext_info = link_data.ext_info;
 	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	hw_link_info->topo_media_conflict = link_data.topo_media_conflict;
 	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
 
 	/* update fc info */
@@ -476,6 +475,49 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
 	ICE_FW_LOG_DESC_SIZE(ICE_AQC_FW_LOG_ID_MAX)
 
 /**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_fw_logging_data *config;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 size;
+
+	size = ICE_FW_LOG_DESC_SIZE_MAX;
+	config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+	if (!config)
+		return ICE_ERR_NO_MEMORY;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+	if (!status) {
+		u16 i;
+
+		/* Save FW logging information into the HW structure */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 v, m, flgs;
+
+			v = le16_to_cpu(config->entry[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+			if (m < ICE_AQC_FW_LOG_ID_MAX)
+				hw->fw_log.evnts[m].cur = flgs;
+		}
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), config);
+
+	return status;
+}
+
+/**
  * ice_cfg_fw_log - configure FW logging
  * @hw: pointer to the HW struct
  * @enable: enable certain FW logging events if true, disable all if false
@@ -529,6 +571,11 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable)
 	    (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
 		return 0;
 
+	/* Get current FW log settings */
+	status = ice_get_fw_log_cfg(hw);
+	if (status)
+		return status;
+
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
 	cmd = &desc.params.fw_logging;
 
@@ -634,17 +681,17 @@ out:
  */
 void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
 {
-	ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg Start ]\n");
-	ice_debug_array(hw, ICE_DBG_AQ_MSG, 16, 1, (u8 *)buf,
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+	ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
 			le16_to_cpu(desc->datalen));
-	ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg End ]\n");
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
 }
 
 /**
  * ice_get_itr_intrl_gran - determine int/intrl granularity
  * @hw: pointer to the HW struct
  *
- * Determines the itr/intrl granularities based on the maximum aggregate
+ * Determines the ITR/intrl granularities based on the maximum aggregate
  * bandwidth according to the device's configuration during power-on.
  */
 static void ice_get_itr_intrl_gran(struct ice_hw *hw)
@@ -815,6 +862,10 @@ err_unroll_cqinit:
 /**
  * ice_deinit_hw - unroll initialization operations done by ice_init_hw
  * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
  */
 void ice_deinit_hw(struct ice_hw *hw)
 {
@@ -1447,6 +1498,7 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 	struct ice_hw_func_caps *func_p = NULL;
 	struct ice_hw_dev_caps *dev_p = NULL;
 	struct ice_hw_common_caps *caps;
+	char const *prefix;
 	u32 i;
 
 	if (!buf)
@@ -1457,9 +1509,11 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 	if (opc == ice_aqc_opc_list_dev_caps) {
 		dev_p = &hw->dev_caps;
 		caps = &dev_p->common_cap;
+		prefix = "dev cap";
 	} else if (opc == ice_aqc_opc_list_func_caps) {
 		func_p = &hw->func_caps;
 		caps = &func_p->common_cap;
+		prefix = "func cap";
 	} else {
 		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
 		return;
@@ -1475,28 +1529,29 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 		case ICE_AQC_CAPS_VALID_FUNCTIONS:
 			caps->valid_functions = number;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Valid Functions = %d\n",
+				  "%s: valid functions = %d\n", prefix,
 				  caps->valid_functions);
 			break;
 		case ICE_AQC_CAPS_SRIOV:
 			caps->sr_iov_1_1 = (number == 1);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: SR-IOV = %d\n", caps->sr_iov_1_1);
+				  "%s: SR-IOV = %d\n", prefix,
+				  caps->sr_iov_1_1);
 			break;
 		case ICE_AQC_CAPS_VF:
 			if (dev_p) {
 				dev_p->num_vfs_exposed = number;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VFs exposed = %d\n",
+					  "%s: VFs exposed = %d\n", prefix,
 					  dev_p->num_vfs_exposed);
 			} else if (func_p) {
 				func_p->num_allocd_vfs = number;
 				func_p->vf_base_id = logical_id;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VFs allocated = %d\n",
+					  "%s: VFs allocated = %d\n", prefix,
 					  func_p->num_allocd_vfs);
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VF base_id = %d\n",
+					  "%s: VF base_id = %d\n", prefix,
 					  func_p->vf_base_id);
 			}
 			break;
@@ -1504,69 +1559,69 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 			if (dev_p) {
 				dev_p->num_vsi_allocd_to_host = number;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.VSI cnt = %d\n",
+					  "%s: num VSI alloc to host = %d\n",
+					  prefix,
 					  dev_p->num_vsi_allocd_to_host);
 			} else if (func_p) {
 				func_p->guar_num_vsi =
 					ice_get_num_per_func(hw, ICE_MAX_VSI);
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Func.VSI cnt = %d\n",
-					  number);
+					  "%s: num guaranteed VSI (fw) = %d\n",
+					  prefix, number);
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: num guaranteed VSI = %d\n",
+					  prefix, func_p->guar_num_vsi);
 			}
 			break;
 		case ICE_AQC_CAPS_RSS:
 			caps->rss_table_size = number;
 			caps->rss_table_entry_width = logical_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table size = %d\n",
+				  "%s: RSS table size = %d\n", prefix,
 				  caps->rss_table_size);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table width = %d\n",
+				  "%s: RSS table width = %d\n", prefix,
 				  caps->rss_table_entry_width);
 			break;
 		case ICE_AQC_CAPS_RXQS:
 			caps->num_rxq = number;
 			caps->rxq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+				  "%s: num Rx queues = %d\n", prefix,
+				  caps->num_rxq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Rx first queue ID = %d\n",
+				  "%s: Rx first queue ID = %d\n", prefix,
 				  caps->rxq_first_id);
 			break;
 		case ICE_AQC_CAPS_TXQS:
 			caps->num_txq = number;
 			caps->txq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+				  "%s: num Tx queues = %d\n", prefix,
+				  caps->num_txq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Tx first queue ID = %d\n",
+				  "%s: Tx first queue ID = %d\n", prefix,
 				  caps->txq_first_id);
 			break;
 		case ICE_AQC_CAPS_MSIX:
 			caps->num_msix_vectors = number;
 			caps->msix_vector_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX vector count = %d\n",
+				  "%s: MSIX vector count = %d\n", prefix,
 				  caps->num_msix_vectors);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX first vector index = %d\n",
+				  "%s: MSIX first vector index = %d\n", prefix,
 				  caps->msix_vector_first_id);
 			break;
 		case ICE_AQC_CAPS_MAX_MTU:
 			caps->max_mtu = number;
-			if (dev_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.MaxMTU = %d\n",
-					  caps->max_mtu);
-			else if (func_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: func.MaxMTU = %d\n",
-					  caps->max_mtu);
+			ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n",
+				  prefix, caps->max_mtu);
 			break;
 		default:
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
-				  cap);
+				  "%s: unknown capability[%d]: 0x%x\n", prefix,
+				  i, cap);
 			break;
 		}
 	}
@@ -1947,36 +2002,37 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
  */
 enum ice_status ice_update_link_info(struct ice_port_info *pi)
 {
-	struct ice_aqc_get_phy_caps_data *pcaps;
-	struct ice_phy_info *phy_info;
+	struct ice_link_status *li;
 	enum ice_status status;
-	struct ice_hw *hw;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
 
-	hw = pi->hw;
-
-	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
-	if (!pcaps)
-		return ICE_ERR_NO_MEMORY;
+	li = &pi->phy.link_info;
 
-	phy_info = &pi->phy;
 	status = ice_aq_get_link_info(pi, true, NULL, NULL);
 	if (status)
-		goto out;
+		return status;
+
+	if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		struct ice_aqc_get_phy_caps_data *pcaps;
+		struct ice_hw *hw;
+
+		hw = pi->hw;
+		pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+				     GFP_KERNEL);
+		if (!pcaps)
+			return ICE_ERR_NO_MEMORY;
 
-	if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
 					     pcaps, NULL);
-		if (status)
-			goto out;
+		if (!status)
+			memcpy(li->module_type, &pcaps->module_type,
+			       sizeof(li->module_type));
 
-		memcpy(phy_info->link_info.module_type, &pcaps->module_type,
-		       sizeof(phy_info->link_info.module_type));
+		devm_kfree(ice_hw_to_dev(hw), pcaps);
 	}
-out:
-	devm_kfree(ice_hw_to_dev(hw), pcaps);
+
 	return status;
 }
 
@@ -2081,6 +2137,74 @@ out:
 }
 
 /**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg)
+{
+	if (!caps || !cfg)
+		return;
+
+	cfg->phy_type_low = caps->phy_type_low;
+	cfg->phy_type_high = caps->phy_type_high;
+	cfg->caps = caps->caps;
+	cfg->low_power_ctrl = caps->low_power_ctrl;
+	cfg->eee_cap = caps->eee_cap;
+	cfg->eeer_value = caps->eeer_value;
+	cfg->link_fec_opt = caps->link_fec_options;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ *
+ * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC
+ * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps
+ * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling.
+ */
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
+{
+	switch (fec) {
+	case ICE_FEC_BASER:
+		/* Clear auto FEC and RS bits, and AND BASE-R ability
+		 * bits and OR request bits.
+		 */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+				     ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+				     ICE_AQC_PHY_FEC_25G_KR_REQ;
+		break;
+	case ICE_FEC_RS:
+		/* Clear auto FEC and BASE-R bits, and AND RS ability
+		 * bits and OR request bits.
+		 */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+				     ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+		break;
+	case ICE_FEC_NONE:
+		/* Clear auto FEC and all FEC option bits. */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+		break;
+	case ICE_FEC_AUTO:
+		/* AND auto FEC bit, and all caps bits. */
+		cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+		break;
+	}
+}
+
+/**
  * ice_get_link_status - get status of the HW network link
  * @pi: port information structure
  * @link_up: pointer to bool (true/false = linkup/linkdown)
@@ -2169,6 +2293,29 @@ ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
 }
 
 /**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_lb *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_lb;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+	if (ena_lpbk)
+		cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
  * ice_aq_set_port_id_led
  * @pi: pointer to the port information
  * @is_orig_mode: is this LED set to original mode (by the net-list)
@@ -2552,7 +2699,7 @@ do_aq:
 			ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
 				  vmvf_num, hw->adminq.sq_last_status);
 		else
-			ice_debug(hw, ICE_DBG_SCHED, "disable Q %d failed %d\n",
+			ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
 				  le16_to_cpu(qg_list[0].q_id[0]),
 				  hw->adminq.sq_last_status);
 	}
@@ -2924,7 +3071,6 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
 		return ICE_ERR_CFG;
 
-
 	if (!num_queues) {
 		/* if queue is disabled already yet the disable queue command
 		 * has to be sent to complete the VF reset, then call
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index f1ddebf45231..d1f8353fe6bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -9,6 +9,8 @@
 #include "ice_switch.h"
 #include <linux/avf/virtchnl.h>
 
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
+
 void
 ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
 enum ice_status ice_init_hw(struct ice_hw *hw);
@@ -84,7 +86,11 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 enum ice_status
 ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
 	   bool ena_auto_link_update);
-
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec);
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg);
 enum ice_status
 ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
 			   struct ice_sq_cd *cd);
@@ -95,6 +101,9 @@ enum ice_status
 ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
 		      struct ice_sq_cd *cd);
 enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+enum ice_status
 ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 		       struct ice_sq_cd *cd);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index cc8cb5fdcdc1..e91ac4df0242 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -439,7 +439,7 @@ do {									\
 	/* free the buffer info list */					\
 	if ((qi)->ring.cmd_buf)						\
 		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
-	/* free dma head */						\
+	/* free DMA head */						\
 	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
 } while (0)
 
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index e0585394d984..44945c2165d8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -35,7 +35,7 @@ enum ice_ctl_q {
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
 
 struct ice_ctl_q_ring {
-	void *dma_head;			/* Virtual address to dma head */
+	void *dma_head;			/* Virtual address to DMA head */
 	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
 	void *cmd_buf;			/* command buffer memory */
 
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 8bbf48e04a1c..c2002ded65f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -82,12 +82,14 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
  * @hw: pointer to the HW struct
  * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
  *			 False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ *	     reboots
  * @cd: pointer to command details structure or NULL
  *
  * Stop or Shutdown the embedded LLDP Agent (0x0A05)
  */
 enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
 		 struct ice_sq_cd *cd)
 {
 	struct ice_aqc_lldp_stop *cmd;
@@ -100,17 +102,22 @@ ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
 	if (shutdown_lldp_agent)
 		cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
 
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
 
 /**
  * ice_aq_start_lldp
  * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
  * @cd: pointer to command details structure or NULL
  *
  * Start the embedded LLDP Agent on all ports. (0x0A06)
  */
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
 {
 	struct ice_aqc_lldp_start *cmd;
 	struct ice_aq_desc desc;
@@ -121,6 +128,9 @@ enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
 
 	cmd->command = ICE_AQ_LLDP_AGENT_START;
 
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
 
@@ -163,7 +173,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
  *
  * Get the DCBX status from the Firmware
  */
-u8 ice_get_dcbx_status(struct ice_hw *hw)
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
 {
 	u32 reg;
 
@@ -614,7 +624,8 @@ ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
  *
  * Parse DCB configuration from the LLDPDU
  */
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+static enum ice_status
+ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
 {
 	struct ice_lldp_org_tlv *tlv;
 	enum ice_status ret = 0;
@@ -658,13 +669,13 @@ enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
 /**
  * ice_aq_get_dcb_cfg
  * @hw: pointer to the HW struct
- * @mib_type: mib type for the query
+ * @mib_type: MIB type for the query
  * @bridgetype: bridge type for the query (remote)
  * @dcbcfg: store for LLDPDU data
  *
  * Query DCB configuration from the firmware
  */
-static enum ice_status
+enum ice_status
 ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
 		   struct ice_dcbx_cfg *dcbcfg)
 {
@@ -689,13 +700,13 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
 }
 
 /**
- * ice_aq_start_stop_dcbx - Start/Stop DCBx service in FW
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
  * @hw: pointer to the HW struct
- * @start_dcbx_agent: True if DCBx Agent needs to be started
- *		      False if DCBx Agent needs to be stopped
- * @dcbx_agent_status: FW indicates back the DCBx agent status
- *		       True if DCBx Agent is active
- *		       False if DCBx Agent is stopped
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ *		      False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ *		       True if DCBX Agent is active
+ *		       False if DCBX Agent is stopped
  * @cd: pointer to command details structure or NULL
  *
  * Start/Stop the embedded dcbx Agent. In case that this wrapper function
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
index e7d4416e3a66..522e1452abe2 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -120,8 +120,9 @@ struct ice_cee_app_prio {
 	u8 prio_map;
 } __packed;
 
-u8 ice_get_dcbx_status(struct ice_hw *hw);
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg);
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg);
 enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
 enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
 enum ice_status ice_init_dcb(struct ice_hw *hw);
@@ -131,9 +132,10 @@ ice_query_port_ets(struct ice_port_info *pi,
 		   struct ice_sq_cd *cmd_details);
 #ifdef CONFIG_DCB
 enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
 		 struct ice_sq_cd *cd);
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
 enum ice_status
 ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
 		       bool *dcbx_agent_status, struct ice_sq_cd *cd);
@@ -144,6 +146,7 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
 static inline enum ice_status
 ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
 		 bool __always_unused shutdown_lldp_agent,
+		 bool __always_unused persist,
 		 struct ice_sq_cd __always_unused *cd)
 {
 	return 0;
@@ -151,6 +154,7 @@ ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
 
 static inline enum ice_status
 ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+		  bool __always_unused persist,
 		  struct ice_sq_cd __always_unused *cd)
 {
 	return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 3e81af1884fc..fe88b127ca42 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -120,12 +120,14 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
 			tc_map = ICE_DFLT_TRAFFIC_CLASS;
 
 		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
-		if (ret)
+		if (ret) {
 			dev_err(&pf->pdev->dev,
 				"Failed to config TC for VSI index: %d\n",
 				pf->vsi[v]->idx);
-		else
-			ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
 	}
 }
 
@@ -133,8 +135,10 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
  * ice_pf_dcb_cfg - Apply new DCB configuration
  * @pf: pointer to the PF struct
  * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
  */
-static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
+static
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 {
 	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
 	struct ice_aqc_port_ets_elem buf = { 0 };
@@ -163,7 +167,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
 	/* avoid race conditions by holding the lock while disabling and
 	 * re-enabling the VSI
 	 */
-	rtnl_lock();
+	if (!locked)
+		rtnl_lock();
 	ice_pf_dis_all_vsi(pf, true);
 
 	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
@@ -192,7 +197,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
 
 out:
 	ice_pf_ena_all_vsi(pf, true);
-	rtnl_unlock();
+	if (!locked)
+		rtnl_unlock();
 	devm_kfree(&pf->pdev->dev, old_cfg);
 	return ret;
 }
@@ -271,15 +277,16 @@ dcb_error:
 	prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
 	prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
 	memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
-	ice_pf_dcb_cfg(pf, prev_cfg);
+	ice_pf_dcb_cfg(pf, prev_cfg, false);
 	devm_kfree(&pf->pdev->dev, prev_cfg);
 }
 
 /**
  * ice_dcb_init_cfg - set the initial DCB config in SW
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
  */
-static int ice_dcb_init_cfg(struct ice_pf *pf)
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
 {
 	struct ice_dcbx_cfg *newcfg;
 	struct ice_port_info *pi;
@@ -294,7 +301,7 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
 	memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
 
 	dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
-	if (ice_pf_dcb_cfg(pf, newcfg))
+	if (ice_pf_dcb_cfg(pf, newcfg, locked))
 		ret = -EINVAL;
 
 	devm_kfree(&pf->pdev->dev, newcfg);
@@ -304,9 +311,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
 
 /**
  * ice_dcb_sw_default_config - Apply a default DCB config
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: was this function called with RTNL held
  */
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 {
 	struct ice_aqc_port_ets_elem buf = { 0 };
 	struct ice_dcbx_cfg *dcbcfg;
@@ -338,7 +346,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
 	dcbcfg->app[0].priority = 3;
 	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
 
-	ret = ice_pf_dcb_cfg(pf, dcbcfg);
+	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
 	devm_kfree(&pf->pdev->dev, dcbcfg);
 	if (ret)
 		return ret;
@@ -348,9 +356,10 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
 
 /**
  * ice_init_pf_dcb - initialize DCB for a PF
- * @pf: pf to initiialize DCB for
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
  */
-int ice_init_pf_dcb(struct ice_pf *pf)
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 {
 	struct device *dev = &pf->pdev->dev;
 	struct ice_port_info *port_info;
@@ -360,33 +369,10 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 
 	port_info = hw->port_info;
 
-	/* check if device is DCB capable */
-	if (!hw->func_caps.common_cap.dcb) {
-		dev_dbg(dev, "DCB not supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	/* Best effort to put DCBx and LLDP into a good state */
-	port_info->dcbx_status = ice_get_dcbx_status(hw);
-	if (port_info->dcbx_status != ICE_DCBX_STATUS_DONE &&
-	    port_info->dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
-		bool dcbx_status;
-
-		/* Attempt to start LLDP engine. Ignore errors
-		 * as this will error if it is already started
-		 */
-		ice_aq_start_lldp(hw, NULL);
-
-		/* Attempt to start DCBX. Ignore errors as this
-		 * will error if it is already started
-		 */
-		ice_aq_start_stop_dcbx(hw, true, &dcbx_status, NULL);
-	}
-
 	err = ice_init_dcb(hw);
 	if (err) {
-		/* FW LLDP not in usable state, default to SW DCBx/LLDP */
-		dev_info(&pf->pdev->dev, "FW LLDP not in usable state\n");
+		/* FW LLDP is not active, default to SW DCBX/LLDP */
+		dev_info(&pf->pdev->dev, "FW LLDP is not active\n");
 		hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
 		hw->port_info->is_sw_lldp = true;
 	}
@@ -398,15 +384,16 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 	if (port_info->is_sw_lldp) {
 		sw_default = 1;
 		dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n");
+		clear_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
+	} else {
+		set_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
 	}
 
-	if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
-		sw_default = 1;
+	if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED)
 		dev_info(&pf->pdev->dev, "DCBX not started\n");
-	}
 
 	if (sw_default) {
-		err = ice_dcb_sw_dflt_cfg(pf);
+		err = ice_dcb_sw_dflt_cfg(pf, locked);
 		if (err) {
 			dev_err(&pf->pdev->dev,
 				"Failed to set local DCB config %d\n", err);
@@ -425,7 +412,7 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 
 	set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
 
-	err = ice_dcb_init_cfg(pf);
+	err = ice_dcb_init_cfg(pf, locked);
 	if (err)
 		goto dcb_init_err;
 
@@ -515,6 +502,55 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
 }
 
 /**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+			       struct ice_dcbx_cfg *new_cfg)
+{
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prio_table,
+			   &old_cfg->etscfg.prio_table,
+			   sizeof(new_cfg->etscfg.prio_table))) {
+			need_reconfig = true;
+			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
  * ice_dcb_process_lldp_set_mib_change - Process MIB change
  * @pf: ptr to ice_pf
  * @event: pointer to the admin queue receive event
@@ -523,29 +559,95 @@ void
 ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 				    struct ice_rq_event_info *event)
 {
-	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
-		struct ice_dcbx_cfg *dcbcfg, *prev_cfg;
-		int err;
-
-		prev_cfg = &pf->hw.port_info->local_dcbx_cfg;
-		dcbcfg = devm_kmemdup(&pf->pdev->dev, prev_cfg,
-				      sizeof(*dcbcfg), GFP_KERNEL);
-		if (!dcbcfg)
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_aqc_lldp_get_mib *mib;
+	struct ice_dcbx_cfg tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	struct ice_port_info *pi;
+	u8 type;
+	int ret;
+
+	/* Not DCB capable or capability disabled */
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+		dev_dbg(&pf->pdev->dev,
+			"MIB Change Event in HOST mode\n");
+		return;
+	}
+
+	pi = pf->hw.port_info;
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+	/* Ignore if event is not for Nearest Bridge */
+	type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M);
+	dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+	if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+		return;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+	dev_dbg(&pf->pdev->dev,
+		"LLDP event mib type %s\n", type ? "remote" : "local");
+	if (type == ICE_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+					 &pi->remote_dcbx_cfg);
+		if (ret) {
+			dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
 			return;
+		}
+	}
 
-		err = ice_lldp_to_dcb_cfg(event->msg_buf, dcbcfg);
-		if (!err)
-			ice_pf_dcb_cfg(pf, dcbcfg);
+	/* store the old configuration */
+	tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
 
-		devm_kfree(&pf->pdev->dev, dcbcfg);
+	/* Reset the old DCBX configuration data */
+	memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
 
-		/* Get updated DCBx data from firmware */
-		err = ice_get_dcb_cfg(pf->hw.port_info);
-		if (err)
-			dev_err(&pf->pdev->dev,
-				"Failed to get DCB config\n");
-	} else {
+	/* Get updated DCBX data from firmware */
+	ret = ice_get_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+		return;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
 		dev_dbg(&pf->pdev->dev,
-			"MIB Change Event in HOST mode\n");
+			"No change detected in DCBX configuration.\n");
+		return;
+	}
+
+	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+					   &pi->local_dcbx_cfg);
+	if (!need_reconfig)
+		return;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	}
+
+	rtnl_lock();
+	ice_pf_dis_all_vsi(pf, true);
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		rtnl_unlock();
+		return;
+	}
+
+	/* changes in configuration update VSI */
+	ice_pf_dcb_recfg(pf);
+
+	ice_pf_ena_all_vsi(pf, true);
+	rtnl_unlock();
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index ca7b76faa03c..819081053ff5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -14,7 +14,7 @@ void ice_dcb_rebuild(struct ice_pf *pf);
 u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
-int ice_init_pf_dcb(struct ice_pf *pf);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
 void ice_update_dcb_stats(struct ice_pf *pf);
 int
 ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
@@ -40,7 +40,8 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
 	return 1;
 }
 
-static inline int ice_init_pf_dcb(struct ice_pf *pf)
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
 {
 	dev_dbg(&pf->pdev->dev, "DCB not supported\n");
 	return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1341fde8d53f..52083a63dee6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -45,22 +45,40 @@ static int ice_q_stats_len(struct net_device *netdev)
 				 ICE_VSI_STATS_LEN + ice_q_stats_len(n))
 
 static const struct ice_stats ice_gstrings_vsi_stats[] = {
-	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
-	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
-	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
 	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
-	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
 	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
-	ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
-	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
-	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
 	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
 	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+};
+
+enum ice_ethtool_test_id {
+	ICE_ETH_TEST_REG = 0,
+	ICE_ETH_TEST_EEPROM,
+	ICE_ETH_TEST_INTR,
+	ICE_ETH_TEST_LOOP,
+	ICE_ETH_TEST_LINK,
 };
 
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"EEPROM test    (offline)",
+	"Interrupt test (offline)",
+	"Loopback test  (offline)",
+	"Link test   (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
  * but they aren't. This device is capable of supporting multiple
  * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
@@ -71,45 +89,45 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = {
  * is queried on the base PF netdev.
  */
 static const struct ice_stats ice_gstrings_pf_stats[] = {
-	ICE_PF_STAT("port.tx_bytes", stats.eth.tx_bytes),
-	ICE_PF_STAT("port.rx_bytes", stats.eth.rx_bytes),
-	ICE_PF_STAT("port.tx_unicast", stats.eth.tx_unicast),
-	ICE_PF_STAT("port.rx_unicast", stats.eth.rx_unicast),
-	ICE_PF_STAT("port.tx_multicast", stats.eth.tx_multicast),
-	ICE_PF_STAT("port.rx_multicast", stats.eth.rx_multicast),
-	ICE_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast),
-	ICE_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast),
-	ICE_PF_STAT("port.tx_errors", stats.eth.tx_errors),
-	ICE_PF_STAT("port.tx_size_64", stats.tx_size_64),
-	ICE_PF_STAT("port.rx_size_64", stats.rx_size_64),
-	ICE_PF_STAT("port.tx_size_127", stats.tx_size_127),
-	ICE_PF_STAT("port.rx_size_127", stats.rx_size_127),
-	ICE_PF_STAT("port.tx_size_255", stats.tx_size_255),
-	ICE_PF_STAT("port.rx_size_255", stats.rx_size_255),
-	ICE_PF_STAT("port.tx_size_511", stats.tx_size_511),
-	ICE_PF_STAT("port.rx_size_511", stats.rx_size_511),
-	ICE_PF_STAT("port.tx_size_1023", stats.tx_size_1023),
-	ICE_PF_STAT("port.rx_size_1023", stats.rx_size_1023),
-	ICE_PF_STAT("port.tx_size_1522", stats.tx_size_1522),
-	ICE_PF_STAT("port.rx_size_1522", stats.rx_size_1522),
-	ICE_PF_STAT("port.tx_size_big", stats.tx_size_big),
-	ICE_PF_STAT("port.rx_size_big", stats.rx_size_big),
-	ICE_PF_STAT("port.link_xon_tx", stats.link_xon_tx),
-	ICE_PF_STAT("port.link_xon_rx", stats.link_xon_rx),
-	ICE_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx),
-	ICE_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx),
-	ICE_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down),
-	ICE_PF_STAT("port.rx_undersize", stats.rx_undersize),
-	ICE_PF_STAT("port.rx_fragments", stats.rx_fragments),
-	ICE_PF_STAT("port.rx_oversize", stats.rx_oversize),
-	ICE_PF_STAT("port.rx_jabber", stats.rx_jabber),
-	ICE_PF_STAT("port.rx_csum_bad", hw_csum_rx_error),
-	ICE_PF_STAT("port.rx_length_errors", stats.rx_len_errors),
-	ICE_PF_STAT("port.rx_dropped", stats.eth.rx_discards),
-	ICE_PF_STAT("port.rx_crc_errors", stats.crc_errors),
-	ICE_PF_STAT("port.illegal_bytes", stats.illegal_bytes),
-	ICE_PF_STAT("port.mac_local_faults", stats.mac_local_faults),
-	ICE_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults),
+	ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+	ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+	ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+	ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+	ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+	ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+	ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+	ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+	ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
 };
 
 static const u32 ice_regs_dump_list[] = {
@@ -120,6 +138,9 @@ static const u32 ice_regs_dump_list[] = {
 	QINT_RQCTL(0),
 	PFINT_OICR_ENA,
 	QRX_ITR(0),
+	PF0INT_ITR_0(0),
+	PF0INT_ITR_1(0),
+	PF0INT_ITR_2(0),
 };
 
 struct ice_priv_flag {
@@ -134,7 +155,7 @@ struct ice_priv_flag {
 
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
 	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
-	ICE_PRIV_FLAG("disable-fw-lldp", ICE_FLAG_DISABLE_FW_LLDP),
+	ICE_PRIV_FLAG("enable-fw-lldp", ICE_FLAG_ENABLE_FW_LLDP),
 };
 
 #define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -278,6 +299,571 @@ out:
 	return ret;
 }
 
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+	struct ice_vf *vf = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			return true;
+	return false;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	enum ice_status status;
+	bool link_up = false;
+
+	netdev_info(netdev, "link test\n");
+	status = ice_get_link_status(np->vsi->port_info, &link_up);
+	if (status) {
+		netdev_err(netdev, "link query error, status = %d\n", status);
+		return 1;
+	}
+
+	if (!link_up)
+		return 2;
+
+	return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	netdev_info(netdev, "EEPROM test\n");
+	return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5,
+		0x00000000, 0xFFFFFFFF
+	};
+	u32 val, orig_val;
+	int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+		u32 pattern = patterns[i] & mask;
+
+		wr32(hw, reg, pattern);
+		val = rd32(hw, reg);
+		if (val == pattern)
+			continue;
+		dev_err(&pf->pdev->dev,
+			"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+			, __func__, reg, pattern, val);
+		return 1;
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		dev_err(&pf->pdev->dev,
+			"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+			, __func__, reg, orig_val, val);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_hw *hw = np->vsi->port_info->hw;
+	u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+		hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+	struct ice_diag_reg_test_info {
+		u32 address;
+		u32 mask;
+		u32 elem_num;
+		u32 elem_size;
+	} ice_reg_list[] = {
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+		{GLINT_ITR(1, 0), 0x00000fff, int_elements,
+			GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+		{GLINT_CTL, 0xffff0001, 1, 0}
+	};
+	int i;
+
+	netdev_dbg(netdev, "Register test\n");
+	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+		u32 j;
+
+		for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+			u32 mask = ice_reg_list[i].mask;
+			u32 reg = ice_reg_list[i].address +
+				(j * ice_reg_list[i].elem_size);
+
+			/* bail on failure (non-zero return) */
+			if (ice_reg_pattern_test(hw, reg, mask))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_setup_tx_rings(vsi);
+	if (status)
+		goto err_setup_tx_ring;
+
+	status = ice_vsi_setup_rx_rings(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_cfg(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_start_rx_rings(vsi);
+	if (status)
+		goto err_start_rx_ring;
+
+	return status;
+
+err_start_rx_ring:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+	ice_vsi_free_tx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	status = ice_vsi_stop_rx_rings(vsi);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+	u8 *data;
+
+	if (!pf)
+		return -EINVAL;
+
+	data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Since the ethernet test frame should always be at least
+	 * 64 bytes long, fill some octets in the payload with test data.
+	 */
+	memset(data, 0xFF, size);
+	data[32] = 0xDE;
+	data[42] = 0xAD;
+	data[44] = 0xBE;
+	data[46] = 0xEF;
+
+	*ret_data = data;
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+	/* Validate bytes of a frame under offsets chosen earlier */
+	if (frame[32] == 0xDE &&
+	    frame[42] == 0xAD &&
+	    frame[44] == 0xBE &&
+	    frame[46] == 0xEF &&
+	    frame[48] == 0xFF)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
+{
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+	u64 td_cmd;
+
+	tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return -EINVAL;
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+
+	/* These flags are required for a descriptor to be pushed out */
+	td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+		cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			    (td_cmd << ICE_TXD_QW1_CMD_S) |
+			    ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+			    ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			    ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+	tx_buf->next_to_watch = tx_desc;
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use >= tx_ring->count)
+		tx_ring->next_to_use = 0;
+
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* Wait until the packets get transmitted to the receive queue. */
+	usleep_range(1000, 2000);
+	dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+{
+	struct ice_rx_buf *rx_buf;
+	int valid_frames, i;
+	u8 *received_buf;
+
+	valid_frames = 0;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union ice_32b_rx_flex_desc *rx_desc;
+
+		rx_desc = ICE_RX_DESC(rx_ring, i);
+
+		if (!(rx_desc->wb.status_error0 &
+		    cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+			continue;
+
+		rx_buf = &rx_ring->rx_buf[i];
+		received_buf = page_address(rx_buf->page);
+
+		if (ice_lbtest_check_frame(received_buf))
+			valid_frames++;
+	}
+
+	return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+	struct ice_pf *pf = orig_vsi->back;
+	struct ice_ring *tx_ring, *rx_ring;
+	u8 broadcast[ETH_ALEN], ret = 0;
+	int num_frames, valid_frames;
+	LIST_HEAD(tmp_list);
+	u8 *tx_frame;
+	int i;
+
+	netdev_info(netdev, "loopback test\n");
+
+	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+	if (!test_vsi) {
+		netdev_err(netdev, "Failed to create a VSI for the loopback test");
+		return 1;
+	}
+
+	test_vsi->netdev = netdev;
+	tx_ring = test_vsi->tx_rings[0];
+	rx_ring = test_vsi->rx_rings[0];
+
+	if (ice_lbtest_prepare_rings(test_vsi)) {
+		ret = 2;
+		goto lbtest_vsi_close;
+	}
+
+	if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+		ret = 3;
+		goto lbtest_rings_dis;
+	}
+
+	/* Enable MAC loopback in firmware */
+	if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+		ret = 4;
+		goto lbtest_mac_dis;
+	}
+
+	/* Test VSI needs to receive broadcast packets */
+	eth_broadcast_addr(broadcast);
+	if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
+		ret = 5;
+		goto lbtest_mac_dis;
+	}
+
+	if (ice_add_mac(&pf->hw, &tmp_list)) {
+		ret = 6;
+		goto free_mac_list;
+	}
+
+	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+		ret = 7;
+		goto remove_mac_filters;
+	}
+
+	num_frames = min_t(int, tx_ring->count, 32);
+	for (i = 0; i < num_frames; i++) {
+		if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+			ret = 8;
+			goto lbtest_free_frame;
+		}
+	}
+
+	valid_frames = ice_lbtest_receive_frames(rx_ring);
+	if (!valid_frames)
+		ret = 9;
+	else if (valid_frames != num_frames)
+		ret = 10;
+
+lbtest_free_frame:
+	devm_kfree(&pf->pdev->dev, tx_frame);
+remove_mac_filters:
+	if (ice_remove_mac(&pf->hw, &tmp_list))
+		netdev_err(netdev, "Could not remove MAC filter for the test VSI");
+free_mac_list:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+lbtest_mac_dis:
+	/* Disable MAC loopback after the test is completed. */
+	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+		netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+	if (ice_lbtest_disable_rings(test_vsi))
+		netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+	test_vsi->netdev = NULL;
+	if (ice_vsi_release(test_vsi))
+		netdev_err(netdev, "Failed to remove the test VSI");
+
+	return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	u16 swic_old = pf->sw_int_count;
+
+	netdev_info(netdev, "interrupt test\n");
+
+	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M |
+	     GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_SWINT_TRIG_M);
+
+	usleep_range(1000, 2000);
+	return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+	      u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(netdev, "offline testing starting\n");
+
+		set_bit(__ICE_TESTING, pf->state);
+
+		if (ice_active_vfs(pf)) {
+			dev_warn(&pf->pdev->dev,
+				 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			data[ICE_ETH_TEST_REG] = 1;
+			data[ICE_ETH_TEST_EEPROM] = 1;
+			data[ICE_ETH_TEST_INTR] = 1;
+			data[ICE_ETH_TEST_LOOP] = 1;
+			data[ICE_ETH_TEST_LINK] = 1;
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+			clear_bit(__ICE_TESTING, pf->state);
+			goto skip_ol_tests;
+		}
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			ice_stop(netdev);
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+		data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+		data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+		data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+		if (data[ICE_ETH_TEST_LINK] ||
+		    data[ICE_ETH_TEST_EEPROM] ||
+		    data[ICE_ETH_TEST_LOOP] ||
+		    data[ICE_ETH_TEST_INTR] ||
+		    data[ICE_ETH_TEST_REG])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__ICE_TESTING, pf->state);
+
+		if (if_running) {
+			int status = ice_open(netdev);
+
+			if (status) {
+				dev_err(&pf->pdev->dev,
+					"Could not open device %s, err %d",
+					pf->int_name, status);
+			}
+		}
+	} else {
+		/* Online tests */
+		netdev_info(netdev, "online testing starting\n");
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		if (data[ICE_ETH_TEST_LINK])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[ICE_ETH_TEST_REG] = 0;
+		data[ICE_ETH_TEST_EEPROM] = 0;
+		data[ICE_ETH_TEST_INTR] = 0;
+		data[ICE_ETH_TEST_LOOP] = 0;
+	}
+
+skip_ol_tests:
+	netdev_info(netdev, "testing finished\n");
+}
+
 static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -295,17 +881,17 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 		ice_for_each_alloc_txq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "tx-queue-%u.tx_packets", i);
+				 "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
 		ice_for_each_alloc_rxq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "rx-queue-%u.rx_packets", i);
+				 "rx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
@@ -320,21 +906,24 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx-priority-%u-xon", i);
+				 "tx_priority_%u_xon.nic", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx-priority-%u-xoff", i);
+				 "tx_priority_%u_xoff.nic", i);
 			p += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx-priority-%u-xon", i);
+				 "rx_priority_%u_xon.nic", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx-priority-%u-xoff", i);
+				 "rx_priority_%u_xoff.nic", i);
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_TEST:
+		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
 	case ETH_SS_PRIV_FLAGS:
 		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "%s",
@@ -371,6 +960,185 @@ ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
 }
 
 /**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_set_phy_cfg_data config = { 0 };
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_vsi *vsi = np->vsi;
+	u8 sw_cfg_caps, sw_cfg_fec;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	/* Changing the FEC parameters is not supported if not the PF VSI */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Get last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Copy SW configuration returned from PHY caps to PHY config */
+	ice_copy_phy_caps_to_cfg(caps, &config);
+	sw_cfg_caps = caps->caps;
+	sw_cfg_fec = caps->link_fec_options;
+
+	/* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
+	memset(caps, 0, sizeof(*caps));
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
+	config.link_fec_opt = caps->link_fec_options;
+
+	ice_cfg_phy_fec(&config, req_fec);
+
+	/* If FEC mode has changed, then set PHY configuration and enable AN. */
+	if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
+	    (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
+	    config.link_fec_opt != sw_cfg_fec) {
+		if (caps->caps & ICE_AQC_PHY_AN_MODE)
+			config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+		status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
+
+		if (status)
+			err = -EAGAIN;
+	}
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_fec_mode fec;
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec = ICE_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec = ICE_FEC_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec = ICE_FEC_BASER;
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec = ICE_FEC_NONE;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+	link_info = &pi->phy.link_info;
+
+	/* Set FEC mode based on negotiated link info */
+	switch (link_info->fec_info) {
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	}
+
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Set supported/configured FEC modes based on PHY capability */
+	if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if (caps->link_fec_options == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
  * ice_get_priv_flags - report device private flags
  * @netdev: network interface device structure
  *
@@ -433,10 +1201,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 
 	bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
 
-	if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, change_flags)) {
-		if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, pf->flags)) {
+	if (test_bit(ICE_FLAG_ENABLE_FW_LLDP, change_flags)) {
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) {
 			enum ice_status status;
 
+			/* Disable FW LLDP engine */
 			status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
 							    NULL);
 			/* If unregistering for LLDP events fails, this is
@@ -450,7 +1219,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			/* The AQ call to stop the FW LLDP agent will generate
 			 * an error if the agent is already stopped.
 			 */
-			status = ice_aq_stop_lldp(&pf->hw, true, NULL);
+			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
 			if (status)
 				dev_warn(&pf->pdev->dev,
 					 "Fail to stop LLDP agent\n");
@@ -458,9 +1227,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 * will likely not need DCB, so failure to init is
 			 * not a concern of ethtool
 			 */
-			status = ice_init_pf_dcb(pf);
+			status = ice_init_pf_dcb(pf, true);
 			if (status)
 				dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Forward LLDP packets to default VSI so that they
+			 * are passed up the stack
+			 */
+			ice_cfg_sw_lldp(vsi, false, true);
 		} else {
 			enum ice_status status;
 			bool dcbx_agent_status;
@@ -468,12 +1242,12 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			/* AQ command to start FW LLDP agent will return an
 			 * error if the agent is already started
 			 */
-			status = ice_aq_start_lldp(&pf->hw, NULL);
+			status = ice_aq_start_lldp(&pf->hw, true, NULL);
 			if (status)
 				dev_warn(&pf->pdev->dev,
 					 "Fail to start LLDP Agent\n");
 
-			/* AQ command to start FW DCBx agent will fail if
+			/* AQ command to start FW DCBX agent will fail if
 			 * the agent is already started
 			 */
 			status = ice_aq_start_stop_dcbx(&pf->hw, true,
@@ -491,15 +1265,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 * registration/init failed but do not return error
 			 * state to ethtool
 			 */
-			status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
-							    NULL);
-			if (status)
-				dev_dbg(&pf->pdev->dev,
-					"Fail to reg for MIB change\n");
-
-			status = ice_init_pf_dcb(pf);
+			status = ice_init_pf_dcb(pf, true);
 			if (status)
 				dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Remove rule to direct LLDP packets to default VSI.
+			 * The FW LLDP engine will now be consuming them.
+			 */
+			ice_cfg_sw_lldp(vsi, false, false);
 		}
 	}
 	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
@@ -529,6 +1302,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
 		 * not safe.
 		 */
 		return ICE_ALL_STATS_LEN(netdev);
+	case ETH_SS_TEST:
+		return ICE_TEST_LEN;
 	case ETH_SS_PRIV_FLAGS:
 		return ICE_PRIV_FLAG_ARRAY_SIZE;
 	default:
@@ -628,7 +1403,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     100baseT_Full);
 	}
@@ -636,14 +1412,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseKX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseKX_Full);
 	}
@@ -651,14 +1429,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseX_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     2500baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     2500baseT_Full);
 	}
@@ -666,7 +1446,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     2500baseX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     2500baseX_Full);
 	}
@@ -674,7 +1455,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     5000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     5000baseT_Full);
 	}
@@ -684,28 +1466,32 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseT_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseKR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseKR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseSR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseSR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseLR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseLR_Full);
 	}
@@ -717,7 +1503,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseCR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseCR_Full);
 	}
@@ -725,7 +1512,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseSR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseSR_Full);
 	}
@@ -734,14 +1522,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseKR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseKR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseKR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseKR4_Full);
 	}
@@ -750,21 +1540,24 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseCR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseCR4_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseSR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseSR4_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseLR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseLR4_Full);
 	}
@@ -779,7 +1572,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseCR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseCR2_Full);
 	}
@@ -787,7 +1581,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseKR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseKR2_Full);
 	}
@@ -797,7 +1592,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseSR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseSR2_Full);
 	}
@@ -814,7 +1610,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseCR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -826,7 +1623,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseSR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -838,7 +1636,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseLR4_ER4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -851,7 +1650,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseKR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode)
@@ -1275,6 +2075,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		       struct ethtool_link_ksettings *ks)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
 	struct ice_link_status *hw_link_info;
 	struct ice_vsi *vsi = np->vsi;
 
@@ -1345,6 +2146,40 @@ ice_get_link_ksettings(struct net_device *netdev,
 		break;
 	}
 
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		goto done;
+
+	if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_TOPO_CAP,
+				caps, NULL))
+		netdev_info(netdev, "Get phy capability failed.\n");
+
+	/* Set supported FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+	if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_SW_CFG,
+				caps, NULL))
+		netdev_info(netdev, "Get phy capability failed.\n");
+
+	/* Set advertised FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
 	return 0;
 }
 
@@ -2371,8 +3206,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 
 		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
 			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
-			wr32(&pf->hw, GLINT_RATE(vsi->hw_base_vector +
-						 rc->ring->q_vector->v_idx),
+			wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
 			     ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
 						   pf->hw.intrl_gran));
 		}
@@ -2533,6 +3367,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.get_regs               = ice_get_regs,
 	.get_msglevel           = ice_get_msglevel,
 	.set_msglevel           = ice_set_msglevel,
+	.self_test		= ice_self_test,
 	.get_link		= ethtool_op_get_link,
 	.get_eeprom_len		= ice_get_eeprom_len,
 	.get_eeprom		= ice_get_eeprom,
@@ -2557,6 +3392,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_per_queue_coalesce = ice_get_per_q_coalesce,
 	.set_per_queue_coalesce = ice_set_per_q_coalesce,
+	.get_fecparam		= ice_get_fecparam,
+	.set_fecparam		= ice_set_fecparam,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index ec25f26069b0..6c5ce05742b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -6,6 +6,9 @@
 #ifndef _ICE_HW_AUTOGEN_H_
 #define _ICE_HW_AUTOGEN_H_
 
+#define PF0INT_ITR_0(_i)			(0x03000004 + ((_i) * 4096))
+#define PF0INT_ITR_1(_i)			(0x03000008 + ((_i) * 4096))
+#define PF0INT_ITR_2(_i)			(0x0300000C + ((_i) * 4096))
 #define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD_HEAD_S			0
@@ -155,6 +158,7 @@
 #define PFINT_OICR_HMC_ERR_M			BIT(26)
 #define PFINT_OICR_PE_CRITERR_M			BIT(28)
 #define PFINT_OICR_VFLR_M			BIT(29)
+#define PFINT_OICR_SWINT_M			BIT(31)
 #define PFINT_OICR_CTL				0x0016CA80
 #define PFINT_OICR_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
 #define PFINT_OICR_CTL_ITR_INDX_S		11
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index fbf1eba0cc2a..a19f5920733b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -137,6 +137,8 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
 	 * for PF or EMP this field should be set to zero
 	 */
 	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
 	case ICE_VSI_PF:
 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
 		break;
@@ -251,6 +253,10 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->rx_rings)
 		goto err_rxrings;
 
+	/* There is no need to allocate q_vectors for a loopback VSI. */
+	if (vsi->type == ICE_VSI_LB)
+		return 0;
+
 	/* allocate memory for q_vector pointers */
 	vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
 				      sizeof(*vsi->q_vectors), GFP_KERNEL);
@@ -275,6 +281,8 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
 {
 	switch (vsi->type) {
 	case ICE_VSI_PF:
+		/* fall through */
+	case ICE_VSI_LB:
 		vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
 		vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
 		break;
@@ -313,10 +321,14 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		vsi->alloc_rxq = vf->num_vf_qs;
 		/* pf->num_vf_msix includes (VF miscellaneous vector +
 		 * data queue interrupts). Since vsi->num_q_vectors is number
-		 * of queues vectors, subtract 1 from the original vector
-		 * count
+		 * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+		 * original vector count
 		 */
-		vsi->num_q_vectors = pf->num_vf_msix - 1;
+		vsi->num_q_vectors = pf->num_vf_msix - ICE_NONQ_VECS_VF;
+		break;
+	case ICE_VSI_LB:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
 		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
@@ -516,6 +528,10 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 		if (ice_vsi_alloc_arrays(vsi))
 			goto err_rings;
 		break;
+	case ICE_VSI_LB:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
 		goto unlock_pf;
@@ -732,6 +748,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 				      BIT(cap->rss_table_entry_width));
 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
 		break;
+	case ICE_VSI_LB:
+		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
 			 vsi->type);
@@ -924,6 +942,9 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
 		break;
+	case ICE_VSI_LB:
+		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+		return;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
 		return;
@@ -955,6 +976,8 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 
 	ctxt->info = vsi->info;
 	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
 	case ICE_VSI_PF:
 		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
 		break;
@@ -1145,61 +1168,32 @@ err_out:
 static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	int num_q_vectors = 0;
+	u16 num_q_vectors;
+
+	/* SRIOV doesn't grab irq_tracker entries for each VSI */
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
 
-	if (vsi->sw_base_vector || vsi->hw_base_vector) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero HW base vector %d or SW base vector %d\n",
-			vsi->vsi_num, vsi->hw_base_vector, vsi->sw_base_vector);
+	if (vsi->base_vector) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
 		return -EEXIST;
 	}
 
 	if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		return -ENOENT;
 
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		num_q_vectors = vsi->num_q_vectors;
-		/* reserve slots from OS requested IRQs */
-		vsi->sw_base_vector = ice_get_res(pf, pf->sw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		if (vsi->sw_base_vector < 0) {
-			dev_err(&pf->pdev->dev,
-				"Failed to get tracking for %d SW vectors for VSI %d, err=%d\n",
-				num_q_vectors, vsi->vsi_num,
-				vsi->sw_base_vector);
-			return -ENOENT;
-		}
-		pf->num_avail_sw_msix -= num_q_vectors;
-
-		/* reserve slots from HW interrupts */
-		vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		break;
-	case ICE_VSI_VF:
-		/* take VF misc vector and data vectors into account */
-		num_q_vectors = pf->num_vf_msix;
-		/* For VF VSI, reserve slots only from HW interrupts */
-		vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		break;
-	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
-		break;
-	}
-
-	if (vsi->hw_base_vector < 0) {
+	num_q_vectors = vsi->num_q_vectors;
+	/* reserve slots from OS requested IRQs */
+	vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+				       vsi->idx);
+	if (vsi->base_vector < 0) {
 		dev_err(&pf->pdev->dev,
-			"Failed to get tracking for %d HW vectors for VSI %d, err=%d\n",
-			num_q_vectors, vsi->vsi_num, vsi->hw_base_vector);
-		if (vsi->type != ICE_VSI_VF) {
-			ice_free_res(pf->sw_irq_tracker,
-				     vsi->sw_base_vector, vsi->idx);
-			pf->num_avail_sw_msix += num_q_vectors;
-		}
+			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			num_q_vectors, vsi->vsi_num, vsi->base_vector);
 		return -ENOENT;
 	}
-
-	pf->num_avail_hw_msix -= num_q_vectors;
+	pf->num_avail_sw_msix -= num_q_vectors;
 
 	return 0;
 }
@@ -1842,8 +1836,73 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 }
 
 /**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
  * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
  * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
  */
 void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 {
@@ -1873,43 +1932,17 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 		 * tracked for this PF.
 		 */
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
-			int itr_idx = (q_vector->tx.itr_idx <<
-				       QINT_TQCTL_ITR_INDX_S) &
-				QINT_TQCTL_ITR_INDX_M;
-			u32 val;
-
-			if (vsi->type == ICE_VSI_VF)
-				val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-				      (((i + 1) << QINT_TQCTL_MSIX_INDX_S) &
-				       QINT_TQCTL_MSIX_INDX_M);
-			else
-				val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-				      ((reg_idx << QINT_TQCTL_MSIX_INDX_S) &
-				       QINT_TQCTL_MSIX_INDX_M);
-			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+			ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+					      q_vector->tx.itr_idx);
 			txq++;
 		}
 
 		for (q = 0; q < q_vector->num_ring_rx; q++) {
-			int itr_idx = (q_vector->rx.itr_idx <<
-				       QINT_RQCTL_ITR_INDX_S) &
-				QINT_RQCTL_ITR_INDX_M;
-			u32 val;
-
-			if (vsi->type == ICE_VSI_VF)
-				val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-					(((i + 1) << QINT_RQCTL_MSIX_INDX_S) &
-					 QINT_RQCTL_MSIX_INDX_M);
-			else
-				val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-					((reg_idx << QINT_RQCTL_MSIX_INDX_S) &
-					 QINT_RQCTL_MSIX_INDX_M);
-			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+			ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+					      q_vector->rx.itr_idx);
 			rxq++;
 		}
 	}
-
-	ice_flush(hw);
 }
 
 /**
@@ -2024,6 +2057,19 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
  * ice_vsi_stop_tx_rings - Disable Tx rings
  * @vsi: the VSI being configured
  * @rst_src: reset source
@@ -2070,8 +2116,9 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			break;
 
 		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			if (!rings || !rings[q_idx] ||
-			    !rings[q_idx]->q_vector) {
+			struct ice_q_vector *q_vector;
+
+			if (!rings || !rings[q_idx]) {
 				err = -EINVAL;
 				goto err_out;
 			}
@@ -2091,9 +2138,10 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			/* trigger a software interrupt for the vector
 			 * associated to the queue to schedule NAPI handler
 			 */
-			wr32(hw, GLINT_DYN_CTL(rings[i]->q_vector->reg_idx),
-			     GLINT_DYN_CTL_SWINT_TRIG_M |
-			     GLINT_DYN_CTL_INTENA_MSK_M);
+			q_vector = rings[i]->q_vector;
+			if (q_vector)
+				ice_trigger_sw_intr(hw, q_vector);
+
 			q_idx++;
 		}
 		status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc,
@@ -2234,7 +2282,14 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
 			goto clear_reg_idx;
 		}
 
-		q_vector->reg_idx = q_vector->v_idx + vsi->hw_base_vector;
+		if (vsi->type == ICE_VSI_VF) {
+			struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+			q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+		} else {
+			q_vector->reg_idx =
+				q_vector->v_idx + vsi->base_vector;
+		}
 	}
 
 	return 0;
@@ -2291,6 +2346,54 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
 }
 
 /**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	list->fltr_info.vsi_handle = vsi->idx;
+	list->fltr_info.l_data.ethertype_mac.ethertype = ETH_P_LLDP;
+
+	if (tx) {
+		list->fltr_info.fltr_act = ICE_DROP_PACKET;
+		list->fltr_info.flag = ICE_FLTR_TX;
+		list->fltr_info.src_id = ICE_SRC_ID_VSI;
+	} else {
+		list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		list->fltr_info.flag = ICE_FLTR_RX;
+		list->fltr_info.src_id = ICE_SRC_ID_LPORT;
+	}
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (create)
+		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+	else
+		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+
+	if (status)
+		dev_err(&pf->pdev->dev,
+			"Fail %s %s LLDP rule on VSI %i error: %d\n",
+			create ? "adding" : "removing", tx ? "TX" : "RX",
+			vsi->vsi_num, status);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
+/**
  * ice_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
  * @pi: pointer to the port_info instance
@@ -2310,6 +2413,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct device *dev = &pf->pdev->dev;
+	enum ice_status status;
 	struct ice_vsi *vsi;
 	int ret, i;
 
@@ -2389,23 +2493,24 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		if (ret)
 			goto unroll_alloc_q_vector;
 
-		/* Setup Vector base only during VF init phase or when VF asks
-		 * for more vectors than assigned number. In all other cases,
-		 * assign hw_base_vector to the value given earlier.
-		 */
-		if (test_bit(ICE_VF_STATE_CFG_INTR, pf->vf[vf_id].vf_states)) {
-			ret = ice_vsi_setup_vector_base(vsi);
-			if (ret)
-				goto unroll_vector_base;
-		} else {
-			vsi->hw_base_vector = pf->vf[vf_id].first_vector_idx;
-		}
 		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
 		if (ret)
 			goto unroll_vector_base;
 
 		pf->q_left_tx -= vsi->alloc_txq;
 		pf->q_left_rx -= vsi->alloc_rxq;
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_LB:
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vsi_init;
 		break;
 	default:
 		/* clean up the resources and exit */
@@ -2416,12 +2521,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	for (i = 0; i < vsi->tc_cfg.numtc; i++)
 		max_txqs[i] = pf->num_lan_tx;
 
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
-			      max_txqs);
-	if (ret) {
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
 		dev_err(&pf->pdev->dev,
 			"VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, ret);
+			vsi->vsi_num, status);
 		goto unroll_vector_base;
 	}
 
@@ -2430,19 +2535,28 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
 	 * The rule is added once for PF VSI in order to create appropriate
 	 * recipe, since VSI/VSI list is ignored with drop action...
+	 * Also add rules to handle LLDP Tx and Rx packets.  Tx LLDP packets
+	 * need to be dropped so that VFs cannot send LLDP packets to reconfig
+	 * DCB settings in the HW.  Also, if the FW DCBX engine is not running
+	 * then Rx LLDP packets need to be redirected up the stack.
 	 */
-	if (vsi->type == ICE_VSI_PF)
+	if (vsi->type == ICE_VSI_PF) {
 		ice_vsi_add_rem_eth_mac(vsi, true);
 
+		/* Tx LLDP packets */
+		ice_cfg_sw_lldp(vsi, true, true);
+
+		/* Rx LLDP packets */
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+			ice_cfg_sw_lldp(vsi, false, true);
+	}
+
 	return vsi;
 
 unroll_vector_base:
 	/* reclaim SW interrupts back to the common pool */
-	ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+	ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 	pf->num_avail_sw_msix += vsi->num_q_vectors;
-	/* reclaim HW interrupt back to the common pool */
-	ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
-	pf->num_avail_hw_msix += vsi->num_q_vectors;
 unroll_alloc_q_vector:
 	ice_vsi_free_q_vectors(vsi);
 unroll_vsi_init:
@@ -2463,17 +2577,17 @@ unroll_get_qs:
 static void ice_vsi_release_msix(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	u16 vector = vsi->hw_base_vector;
 	struct ice_hw *hw = &pf->hw;
 	u32 txq = 0;
 	u32 rxq = 0;
 	int i, q;
 
-	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+	for (i = 0; i < vsi->num_q_vectors; i++) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
 
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, vector), 0);
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, vector), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
 			txq++;
@@ -2495,7 +2609,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 void ice_vsi_free_irq(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
 		int i;
@@ -2591,11 +2705,11 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
 	int count = 0;
 	int i;
 
-	if (!res || index >= res->num_entries)
+	if (!res || index >= res->end)
 		return -EINVAL;
 
 	id |= ICE_RES_VALID_BIT;
-	for (i = index; i < res->num_entries && res->list[i] == id; i++) {
+	for (i = index; i < res->end && res->list[i] == id; i++) {
 		res->list[i] = 0;
 		count++;
 	}
@@ -2613,10 +2727,9 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
  */
 static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 {
-	int start = res->search_hint;
-	int end = start;
+	int start = 0, end = 0;
 
-	if ((start + needed) > res->num_entries)
+	if (needed > res->end)
 		return -ENOMEM;
 
 	id |= ICE_RES_VALID_BIT;
@@ -2625,7 +2738,7 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 		/* skip already allocated entries */
 		if (res->list[end++] & ICE_RES_VALID_BIT) {
 			start = end;
-			if ((start + needed) > res->num_entries)
+			if ((start + needed) > res->end)
 				break;
 		}
 
@@ -2636,13 +2749,9 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 			while (i != end)
 				res->list[i++] = id;
 
-			if (end == res->num_entries)
-				end = 0;
-
-			res->search_hint = end;
 			return start;
 		}
-	} while (1);
+	} while (end < res->end);
 
 	return -ENOMEM;
 }
@@ -2654,16 +2763,11 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
  * @needed: size of the block needed
  * @id: identifier to track owner
  *
- * Returns the base item index of the block, or -ENOMEM for error
- * The search_hint trick and lack of advanced fit-finding only works
- * because we're highly likely to have all the same sized requests.
- * Linear search time and any fragmentation should be minimal.
+ * Returns the base item index of the block, or negative for error
  */
 int
 ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
 {
-	int ret;
-
 	if (!res || !pf)
 		return -EINVAL;
 
@@ -2674,16 +2778,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
 		return -EINVAL;
 	}
 
-	/* search based on search_hint */
-	ret = ice_search_res(res, needed, id);
-
-	if (ret < 0) {
-		/* previous search failed. Reset search hint and try again */
-		res->search_hint = 0;
-		ret = ice_search_res(res, needed, id);
-	}
-
-	return ret;
+	return ice_search_res(res, needed, id);
 }
 
 /**
@@ -2692,7 +2787,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
  */
 void ice_vsi_dis_irq(struct ice_vsi *vsi)
 {
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	u32 val;
@@ -2738,6 +2833,21 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
  * ice_vsi_release - Delete a VSI and free its resources
  * @vsi: the VSI being removed
  *
@@ -2745,60 +2855,61 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
  */
 int ice_vsi_release(struct ice_vsi *vsi)
 {
-	struct ice_vf *vf = NULL;
 	struct ice_pf *pf;
 
 	if (!vsi->back)
 		return -ENODEV;
 	pf = vsi->back;
 
-	if (vsi->type == ICE_VSI_VF)
-		vf = &pf->vf[vsi->vf_id];
-	/* do not unregister and free netdevs while driver is in the reset
-	 * recovery pending state. Since reset/rebuild happens through PF
-	 * service task workqueue, its not a good idea to unregister netdev
-	 * that is associated to the PF that is running the work queue items
-	 * currently. This is done to avoid check_flush_dependency() warning
-	 * on this wq
+	/* do not unregister while driver is in the reset recovery pending
+	 * state. Since reset/rebuild happens through PF service task workqueue,
+	 * it's not a good idea to unregister netdev that is associated to the
+	 * PF that is running the work queue items currently. This is done to
+	 * avoid check_flush_dependency() warning on this wq
 	 */
-	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
-		ice_napi_del(vsi);
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state))
 		unregister_netdev(vsi->netdev);
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
 
 	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
 		ice_rss_clean(vsi);
 
 	/* Disable VSI and free resources */
-	ice_vsi_dis_irq(vsi);
+	if (vsi->type != ICE_VSI_LB)
+		ice_vsi_dis_irq(vsi);
 	ice_vsi_close(vsi);
 
-	/* reclaim interrupt vectors back to PF */
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
 	if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
-		ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
-		/* reclaim HW interrupts back to the common pool */
-		ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
-		pf->num_avail_hw_msix += vsi->num_q_vectors;
-	} else if (test_bit(ICE_VF_STATE_CFG_INTR, vf->vf_states)) {
-		/* Reclaim VF resources back only while freeing all VFs or
-		 * vector reassignment is requested
-		 */
-		ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
-			     vsi->idx);
-		pf->num_avail_hw_msix += pf->num_vf_msix;
 	}
 
-	if (vsi->type == ICE_VSI_PF)
+	if (vsi->type == ICE_VSI_PF) {
 		ice_vsi_add_rem_eth_mac(vsi, false);
+		ice_cfg_sw_lldp(vsi, true, false);
+		/* The Rx rule will only exist to remove if the LLDP FW
+		 * engine is currently stopped
+		 */
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+			ice_cfg_sw_lldp(vsi, false, false);
+	}
 
 	ice_remove_vsi_fltr(&pf->hw, vsi->idx);
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
+
+	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
+	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
 	ice_vsi_clear_rings(vsi);
 
 	ice_vsi_put_qs(vsi);
@@ -2825,6 +2936,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct ice_vf *vf = NULL;
+	enum ice_status status;
 	struct ice_pf *pf;
 	int ret, i;
 
@@ -2838,24 +2950,17 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_free_q_vectors(vsi);
 
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
 	if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
-		ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
-		vsi->sw_base_vector = 0;
-		/* reclaim HW interrupts back to the common pool */
-		ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector,
-			     vsi->idx);
-		pf->num_avail_hw_msix += vsi->num_q_vectors;
-	} else {
-		/* Reclaim VF resources back to the common pool for reset and
-		 * and rebuild, with vector reassignment
-		 */
-		ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
-			     vsi->idx);
-		pf->num_avail_hw_msix += pf->num_vf_msix;
+		vsi->base_vector = 0;
 	}
-	vsi->hw_base_vector = 0;
 
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
@@ -2881,10 +2986,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		if (ret)
 			goto err_rings;
 
-		ret = ice_vsi_setup_vector_base(vsi);
-		if (ret)
-			goto err_vectors;
-
 		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
 		if (ret)
 			goto err_vectors;
@@ -2929,12 +3030,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	for (i = 0; i < vsi->tc_cfg.numtc; i++)
 		max_txqs[i] = pf->num_lan_tx;
 
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
-			      max_txqs);
-	if (ret) {
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
 		dev_err(&pf->pdev->dev,
 			"VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, ret);
+			vsi->vsi_num, status);
 		goto err_vectors;
 	}
 	return 0;
@@ -2956,7 +3057,7 @@ err_vsi:
 
 /**
  * ice_is_reset_in_progress - check for a reset in progress
- * @state: pf state field
+ * @state: PF state field
  */
 bool ice_is_reset_in_progress(unsigned long *state)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index a91d3553cc89..6e43ef03bfc3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -19,6 +19,14 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+#endif /* CONFIG_PCI_IOV */
+
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -37,6 +45,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
 void ice_vsi_delete(struct ice_vsi *vsi);
 
 int ice_vsi_clear(struct ice_vsi *vsi);
@@ -49,6 +59,8 @@ struct ice_vsi *
 ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	      enum ice_vsi_type type, u16 vf_id);
 
+void ice_napi_del(struct ice_vsi *vsi);
+
 int ice_vsi_release(struct ice_vsi *vsi);
 
 void ice_vsi_close(struct ice_vsi *vsi);
@@ -64,6 +76,8 @@ bool ice_is_reset_in_progress(unsigned long *state);
 
 void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
 
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
 #ifdef CONFIG_DCB
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7843abf4d44d..28ec0d57941d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -61,9 +61,10 @@ static u32 ice_get_tx_pending(struct ice_ring *ring)
 static void ice_check_for_hang_subtask(struct ice_pf *pf)
 {
 	struct ice_vsi *vsi = NULL;
+	struct ice_hw *hw;
 	unsigned int i;
-	u32 v, v_idx;
 	int packets;
+	u32 v;
 
 	ice_for_each_vsi(pf, v)
 		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
@@ -77,12 +78,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
 		return;
 
+	hw = &vsi->back->hw;
+
 	for (i = 0; i < vsi->num_txq; i++) {
 		struct ice_ring *tx_ring = vsi->tx_rings[i];
 
 		if (tx_ring && tx_ring->desc) {
-			int itr = ICE_ITR_NONE;
-
 			/* If packet counter has not changed the queue is
 			 * likely stalled, so force an interrupt for this
 			 * queue.
@@ -93,12 +94,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 			packets = tx_ring->stats.pkts & INT_MAX;
 			if (tx_ring->tx_stats.prev_pkt == packets) {
 				/* Trigger sw interrupt to revive the queue */
-				v_idx = tx_ring->q_vector->v_idx;
-				wr32(&vsi->back->hw,
-				     GLINT_DYN_CTL(vsi->hw_base_vector + v_idx),
-				     (itr << GLINT_DYN_CTL_ITR_INDX_S) |
-				     GLINT_DYN_CTL_SWINT_TRIG_M |
-				     GLINT_DYN_CTL_INTENA_MSK_M);
+				ice_trigger_sw_intr(hw, tx_ring->q_vector);
 				continue;
 			}
 
@@ -113,6 +109,67 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 }
 
 /**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+	LIST_HEAD(tmp_add_list);
+	u8 broadcast[ETH_ALEN];
+	struct ice_vsi *vsi;
+	int status;
+
+	vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF);
+	if (!vsi)
+		return -EINVAL;
+
+	/* To add a MAC filter, first add the MAC to a list and then
+	 * pass the list to ice_add_mac.
+	 */
+
+	 /* Add a unicast MAC filter so the VSI can get its packets */
+	status = ice_add_mac_to_list(vsi, &tmp_add_list,
+				     vsi->port_info->mac.perm_addr);
+	if (status)
+		goto unregister;
+
+	/* VSI needs to receive broadcast traffic, so add the broadcast
+	 * MAC address to the list as well.
+	 */
+	eth_broadcast_addr(broadcast);
+	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+	if (status)
+		goto free_mac_list;
+
+	/* Program MAC filters for entries in tmp_add_list */
+	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	if (status)
+		status = -ENOMEM;
+
+free_mac_list:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+
+unregister:
+	/* We aren't useful with no MAC filters, so unregister if we
+	 * had an error
+	 */
+	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
+		dev_err(&pf->pdev->dev,
+			"Could not add MAC filters error %d. Unregistering device\n",
+			status);
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	return status;
+}
+
+/**
  * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
  * @netdev: the net device on which the sync is happening
  * @addr: MAC address to sync
@@ -567,7 +624,11 @@ static void ice_reset_subtask(struct ice_pf *pf)
  */
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 {
+	struct ice_aqc_get_phy_caps_data *caps;
+	enum ice_status status;
+	const char *fec_req;
 	const char *speed;
+	const char *fec;
 	const char *fc;
 
 	if (!vsi)
@@ -584,6 +645,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 	}
 
 	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		speed = "100 G";
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		speed = "50 G";
+		break;
 	case ICE_AQ_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
@@ -615,13 +682,13 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 
 	switch (vsi->port_info->fc.current_mode) {
 	case ICE_FC_FULL:
-		fc = "RX/TX";
+		fc = "Rx/Tx";
 		break;
 	case ICE_FC_TX_PAUSE:
-		fc = "TX";
+		fc = "Tx";
 		break;
 	case ICE_FC_RX_PAUSE:
-		fc = "RX";
+		fc = "Rx";
 		break;
 	case ICE_FC_NONE:
 		fc = "None";
@@ -631,8 +698,47 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 		break;
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
-		    speed, fc);
+	/* Get FEC mode based on negotiated link info */
+	switch (vsi->port_info->phy.link_info.fec_info) {
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fec = "RS-FEC";
+		break;
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fec = "FC-FEC/BASE-R";
+		break;
+	default:
+		fec = "NONE";
+		break;
+	}
+
+	/* Get FEC mode requested based on PHY caps last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		fec_req = "Unknown";
+		goto done;
+	}
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+	if (status)
+		netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		fec_req = "RS-FEC";
+	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fec_req = "FC-FEC/BASE-R";
+	else
+		fec_req = "NONE";
+
+	devm_kfree(&vsi->back->pdev->dev, caps);
+
+done:
+	netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n",
+		    speed, fec_req, fec, fc);
 }
 
 /**
@@ -664,7 +770,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
 
 /**
  * ice_link_event - process the link event
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
  * @pi: port_info for the port that the link event is associated with
  * @link_up: true if the physical link is up and false if it is down
  * @link_speed: current link speed received from the link event
@@ -774,7 +880,7 @@ static int ice_init_link_events(struct ice_port_info *pi)
 
 /**
  * ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
  * @event: event structure containing link status info
  */
 static int
@@ -1161,16 +1267,16 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		}
 	}
 
-	/* see if one of the VFs needs to be reset */
-	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+	/* check to see if one of the VFs caused the MDD */
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
 		struct ice_vf *vf = &pf->vf[i];
 
-		mdd_detected = false;
+		bool vf_mdd_detected = false;
 
 		reg = rd32(hw, VP_MDET_TX_PQM(i));
 		if (reg & VP_MDET_TX_PQM_VALID_M) {
 			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1178,7 +1284,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_TX_TCLAN(i));
 		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
 			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1186,7 +1292,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_TX_TDPU(i));
 		if (reg & VP_MDET_TX_TDPU_VALID_M) {
 			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1194,19 +1300,18 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_RX(i));
 		if (reg & VP_MDET_RX_VALID_M) {
 			wr32(hw, VP_MDET_RX(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
 				 i);
 		}
 
-		if (mdd_detected) {
+		if (vf_mdd_detected) {
 			vf->num_mdd_events++;
-			dev_info(&pf->pdev->dev,
-				 "Use PF Control I/F to re-enable the VF\n");
-			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+			if (vf->num_mdd_events > 1)
+				dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n",
+					 i, vf->num_mdd_events);
 		}
 	}
-
 }
 
 /**
@@ -1327,7 +1432,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 {
 	int q_vectors = vsi->num_q_vectors;
 	struct ice_pf *pf = vsi->back;
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 	int rx_int_idx = 0;
 	int tx_int_idx = 0;
 	int vector, err;
@@ -1408,7 +1513,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
 	wr32(hw, PFINT_OICR_ENA, val);
 
 	/* SW_ITR_IDX = 0, but don't change INTENA */
-	wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
 	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
 }
 
@@ -1430,6 +1535,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	oicr = rd32(hw, PFINT_OICR);
 	ena_mask = rd32(hw, PFINT_OICR_ENA);
 
+	if (oicr & PFINT_OICR_SWINT_M) {
+		ena_mask &= ~PFINT_OICR_SWINT_M;
+		pf->sw_int_count++;
+	}
+
 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
 		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
 		set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
@@ -1556,15 +1666,13 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
 	ice_flush(hw);
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
-		synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector);
+		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
 		devm_free_irq(&pf->pdev->dev,
-			      pf->msix_entries[pf->sw_oicr_idx].vector, pf);
+			      pf->msix_entries[pf->oicr_idx].vector, pf);
 	}
 
 	pf->num_avail_sw_msix += 1;
-	ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID);
-	pf->num_avail_hw_msix += 1;
-	ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID);
+	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
 }
 
 /**
@@ -1618,43 +1726,31 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 	if (ice_is_reset_in_progress(pf->state))
 		goto skip_req_irq;
 
-	/* reserve one vector in sw_irq_tracker for misc interrupts */
-	oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 	if (oicr_idx < 0)
 		return oicr_idx;
 
 	pf->num_avail_sw_msix -= 1;
-	pf->sw_oicr_idx = oicr_idx;
-
-	/* reserve one vector in hw_irq_tracker for misc interrupts */
-	oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-	if (oicr_idx < 0) {
-		ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-		pf->num_avail_sw_msix += 1;
-		return oicr_idx;
-	}
-	pf->num_avail_hw_msix -= 1;
-	pf->hw_oicr_idx = oicr_idx;
+	pf->oicr_idx = oicr_idx;
 
 	err = devm_request_irq(&pf->pdev->dev,
-			       pf->msix_entries[pf->sw_oicr_idx].vector,
+			       pf->msix_entries[pf->oicr_idx].vector,
 			       ice_misc_intr, 0, pf->int_name, pf);
 	if (err) {
 		dev_err(&pf->pdev->dev,
 			"devm_request_irq for %s failed: %d\n",
 			pf->int_name, err);
-		ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 		pf->num_avail_sw_msix += 1;
-		ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-		pf->num_avail_hw_msix += 1;
 		return err;
 	}
 
 skip_req_irq:
 	ice_ena_misc_vector(pf);
 
-	ice_ena_ctrlq_interrupts(hw, pf->hw_oicr_idx);
-	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
+	ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
 	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
 
 	ice_flush(hw);
@@ -1664,21 +1760,6 @@ skip_req_irq:
 }
 
 /**
- * ice_napi_del - Remove NAPI handler for the VSI
- * @vsi: VSI for which NAPI handler is to be removed
- */
-void ice_napi_del(struct ice_vsi *vsi)
-{
-	int v_idx;
-
-	if (!vsi->netdev)
-		return;
-
-	ice_for_each_q_vector(vsi, v_idx)
-		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
-}
-
-/**
  * ice_napi_add - register NAPI handler for the VSI
  * @vsi: VSI for which NAPI handler is to be registered
  *
@@ -1803,8 +1884,8 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
  * @pf: board private structure
  * @pi: pointer to the port_info instance
  *
- * Returns pointer to the successfully allocated VSI sw struct on success,
- * otherwise returns NULL on failure.
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
  */
 static struct ice_vsi *
 ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
@@ -1813,6 +1894,20 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 }
 
 /**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
+}
+
+/**
  * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
  * @netdev: network interface to be adjusted
  * @proto: unused protocol
@@ -1900,8 +1995,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
  */
 static int ice_setup_pf_sw(struct ice_pf *pf)
 {
-	LIST_HEAD(tmp_add_list);
-	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	int status = 0;
 
@@ -1926,38 +2019,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 	 */
 	ice_napi_add(vsi);
 
-	/* To add a MAC filter, first add the MAC to a list and then
-	 * pass the list to ice_add_mac.
-	 */
-
-	 /* Add a unicast MAC filter so the VSI can get its packets */
-	status = ice_add_mac_to_list(vsi, &tmp_add_list,
-				     vsi->port_info->mac.perm_addr);
+	status = ice_init_mac_fltr(pf);
 	if (status)
 		goto unroll_napi_add;
 
-	/* VSI needs to receive broadcast traffic, so add the broadcast
-	 * MAC address to the list as well.
-	 */
-	eth_broadcast_addr(broadcast);
-	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
-	if (status)
-		goto free_mac_list;
-
-	/* program MAC filters for entries in tmp_add_list */
-	status = ice_add_mac(&pf->hw, &tmp_add_list);
-	if (status) {
-		dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
-		status = -ENOMEM;
-		goto free_mac_list;
-	}
-
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
 	return status;
 
-free_mac_list:
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-
 unroll_napi_add:
 	if (vsi) {
 		ice_napi_del(vsi);
@@ -2149,14 +2216,9 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		ice_dis_msix(pf);
 
-	if (pf->sw_irq_tracker) {
-		devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker);
-		pf->sw_irq_tracker = NULL;
-	}
-
-	if (pf->hw_irq_tracker) {
-		devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker);
-		pf->hw_irq_tracker = NULL;
+	if (pf->irq_tracker) {
+		devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+		pf->irq_tracker = NULL;
 	}
 }
 
@@ -2166,7 +2228,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
  */
 static int ice_init_interrupt_scheme(struct ice_pf *pf)
 {
-	int vectors = 0, hw_vectors = 0;
+	int vectors;
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		vectors = ice_ena_msix_range(pf);
@@ -2177,31 +2239,18 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 		return vectors;
 
 	/* set up vector assignment tracking */
-	pf->sw_irq_tracker =
-		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->sw_irq_tracker) +
+	pf->irq_tracker =
+		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
 			     (sizeof(u16) * vectors), GFP_KERNEL);
-	if (!pf->sw_irq_tracker) {
+	if (!pf->irq_tracker) {
 		ice_dis_msix(pf);
 		return -ENOMEM;
 	}
 
 	/* populate SW interrupts pool with number of OS granted IRQs. */
 	pf->num_avail_sw_msix = vectors;
-	pf->sw_irq_tracker->num_entries = vectors;
-
-	/* set up HW vector assignment tracking */
-	hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
-	pf->hw_irq_tracker =
-		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->hw_irq_tracker) +
-			     (sizeof(u16) * hw_vectors), GFP_KERNEL);
-	if (!pf->hw_irq_tracker) {
-		ice_clear_interrupt_scheme(pf);
-		return -ENOMEM;
-	}
-
-	/* populate HW interrupts pool with number of HW supported irqs. */
-	pf->num_avail_hw_msix = hw_vectors;
-	pf->hw_irq_tracker->num_entries = hw_vectors;
+	pf->irq_tracker->num_entries = vectors;
+	pf->irq_tracker->end = pf->irq_tracker->num_entries;
 
 	return 0;
 }
@@ -2252,7 +2301,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	if (!pf)
 		return -ENOMEM;
 
-	/* set up for high or low dma */
+	/* set up for high or low DMA */
 	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (err)
 		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
@@ -2302,7 +2351,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	ice_init_pf(pf);
 
-	err = ice_init_pf_dcb(pf);
+	err = ice_init_pf_dcb(pf, false);
 	if (err) {
 		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
@@ -2368,7 +2417,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	err = ice_setup_pf_sw(pf);
 	if (err) {
-		dev_err(dev, "probe failed due to setup pf switch:%d\n", err);
+		dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
 		goto err_alloc_sw_unroll;
 	}
 
@@ -2625,7 +2674,7 @@ static int __init ice_module_init(void)
 
 	status = pci_register_driver(&ice_driver);
 	if (status) {
-		pr_err("failed to register pci driver, err %d\n", status);
+		pr_err("failed to register PCI driver, err %d\n", status);
 		destroy_workqueue(ice_wq);
 	}
 
@@ -2725,21 +2774,21 @@ free_lists:
 	ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
 
 	if (err) {
-		netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
 			   mac);
 		return err;
 	}
 
 	/* change the netdev's MAC address */
 	memcpy(netdev->dev_addr, mac, netdev->addr_len);
-	netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
 		   netdev->dev_addr);
 
 	/* write new MAC address to the firmware */
 	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
 	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
 	if (status) {
-		netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed.\n",
 			   mac);
 	}
 	return 0;
@@ -2876,6 +2925,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
 		ret = ice_vsi_manage_vlan_insertion(vsi);
 
+	if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
+
 	return ret;
 }
 
@@ -2901,7 +2957,7 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
  *
  * Return 0 on success and negative value on error
  */
-static int ice_vsi_cfg(struct ice_vsi *vsi)
+int ice_vsi_cfg(struct ice_vsi *vsi)
 {
 	int err;
 
@@ -2933,7 +2989,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	ice_for_each_q_vector(vsi, q_idx)  {
+	ice_for_each_q_vector(vsi, q_idx) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
 
 		if (q_vector->rx.ring || q_vector->tx.ring)
@@ -3456,7 +3512,7 @@ int ice_down(struct ice_vsi *vsi)
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -3482,7 +3538,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -3658,7 +3714,7 @@ static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
 }
 
 /**
- * ice_vsi_rebuild_all - rebuild all VSIs in pf
+ * ice_vsi_rebuild_all - rebuild all VSIs in PF
  * @pf: the PF
  */
 static int ice_vsi_rebuild_all(struct ice_pf *pf)
@@ -3728,7 +3784,7 @@ static int ice_vsi_replay_all(struct ice_pf *pf)
 
 /**
  * ice_rebuild - rebuild after reset
- * @pf: pf to rebuild
+ * @pf: PF to rebuild
  */
 static void ice_rebuild(struct ice_pf *pf)
 {
@@ -3740,7 +3796,7 @@ static void ice_rebuild(struct ice_pf *pf)
 	if (test_bit(__ICE_DOWN, pf->state))
 		goto clear_recovery;
 
-	dev_dbg(dev, "rebuilding pf\n");
+	dev_dbg(dev, "rebuilding PF\n");
 
 	ret = ice_init_all_ctrlq(hw);
 	if (ret) {
@@ -3768,12 +3824,6 @@ static void ice_rebuild(struct ice_pf *pf)
 
 	ice_dcb_rebuild(pf);
 
-	/* reset search_hint of irq_trackers to 0 since interrupts are
-	 * reclaimed and could be allocated from beginning during VSI rebuild
-	 */
-	pf->sw_irq_tracker->search_hint = 0;
-	pf->hw_irq_tracker->search_hint = 0;
-
 	err = ice_vsi_rebuild_all(pf);
 	if (err) {
 		dev_err(dev, "ice_vsi_rebuild_all failed\n");
@@ -3857,16 +3907,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	u8 count = 0;
 
 	if (new_mtu == netdev->mtu) {
-		netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
+		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
 		return 0;
 	}
 
 	if (new_mtu < netdev->min_mtu) {
-		netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	} else if (new_mtu > netdev->max_mtu) {
-		netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	}
@@ -3882,7 +3932,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	} while (count < 100);
 
 	if (count == 100) {
-		netdev_err(netdev, "can't change mtu. Device is busy\n");
+		netdev_err(netdev, "can't change MTU. Device is busy\n");
 		return -EBUSY;
 	}
 
@@ -3894,18 +3944,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 
 		err = ice_down(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 
 		err = ice_up(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 	}
 
-	netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+	netdev_info(netdev, "changed MTU to %d\n", new_mtu);
 	return 0;
 }
 
@@ -4241,7 +4291,7 @@ static void ice_tx_timeout(struct net_device *netdev)
  *
  * Returns 0 on success, negative value on failure
  */
-static int ice_open(struct net_device *netdev)
+int ice_open(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -4278,7 +4328,7 @@ static int ice_open(struct net_device *netdev)
  *
  * Returns success only - not allowed to fail
  */
-static int ice_stop(struct net_device *netdev)
+int ice_stop(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 62571d33d0d6..bcb431f1bd92 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -119,7 +119,7 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
 
 	status = ice_read_sr_aq(hw, offset, 1, data, true);
 	if (!status)
-		*data = le16_to_cpu(*(__le16 *)data);
+		*data = le16_to_cpu(*(__force __le16 *)data);
 
 	return status;
 }
@@ -174,7 +174,7 @@ ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
 	} while (words_read < *words);
 
 	for (i = 0; i < *words; i++)
-		data[i] = le16_to_cpu(((__le16 *)data)[i]);
+		data[i] = le16_to_cpu(((__force __le16 *)data)[i]);
 
 read_nvm_buf_aq_exit:
 	*words = words_read;
@@ -316,3 +316,34 @@ ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
 
 	return status;
 }
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_checksum *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	cmd = &desc.params.nvm_checksum;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+	cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	ice_release_nvm(hw);
+
+	if (!status)
+		if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+			status = ICE_ERR_NVM_CHECKSUM;
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 8d49f83be7a5..2a232504379d 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -683,10 +683,10 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
 	u16 i, num_groups_added = 0;
 	enum ice_status status = 0;
 	struct ice_hw *hw = pi->hw;
-	u16 buf_size;
+	size_t buf_size;
 	u32 teid;
 
-	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+	buf_size = struct_size(buf, generic, num_nodes - 1);
 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
 	if (!buf)
 		return ICE_ERR_NO_MEMORY;
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 17afe6acb18a..c01597885629 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -26,6 +26,7 @@ enum ice_status {
 	ICE_ERR_IN_USE				= -16,
 	ICE_ERR_MAX_LIMIT			= -17,
 	ICE_ERR_RESET_ONGOING			= -18,
+	ICE_ERR_NVM_CHECKSUM			= -51,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 9f1f595ae7e6..8271fd651725 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -799,7 +799,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
 		daddr = f_info->l_data.ethertype_mac.mac_addr;
 		/* fall-through */
 	case ICE_SW_LKUP_ETHERTYPE:
-		off = (__be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
 		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
 		break;
 	case ICE_SW_LKUP_MAC_VLAN:
@@ -829,7 +829,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
 		ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
 
 	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
-		off = (__be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
 		*off = cpu_to_be16(vlan_id);
 	}
 
@@ -1973,6 +1973,10 @@ ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
  * ice_add_eth_mac - Add ethertype and MAC based filter rule
  * @hw: pointer to the hardware structure
  * @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
  */
 enum ice_status
 ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
@@ -1990,7 +1994,6 @@ ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
 		    l_type != ICE_SW_LKUP_ETHERTYPE)
 			return ICE_ERR_PARAM;
 
-		em_list_itr->fltr_info.flag = ICE_FLTR_TX;
 		em_list_itr->status = ice_add_rule_internal(hw, l_type,
 							    em_list_itr);
 		if (em_list_itr->status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 732b0b9b2e15..cb123fbe30be 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -8,9 +8,11 @@
 
 #define ICE_SW_CFG_MAX_BUF_LEN 2048
 #define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
 #define ICE_VSI_INVAL_ID 0xffff
 #define ICE_INVAL_Q_HANDLE 0xFFFF
-#define ICE_INVAL_Q_HANDLE 0xFFFF
 
 /* VSI queue context structure */
 struct ice_q_ctx {
@@ -69,9 +71,6 @@ struct ice_fltr_info {
 	/* rule ID returned by firmware once filter rule is created */
 	u16 fltr_rule_id;
 	u16 flag;
-#define ICE_FLTR_RX		BIT(0)
-#define ICE_FLTR_TX		BIT(1)
-#define ICE_FLTR_TX_RX		(ICE_FLTR_RX | ICE_FLTR_TX)
 
 	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
 	u16 src;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 2364eaf33d23..3c83230434b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -55,7 +55,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 	if (!tx_ring->tx_buf)
 		return;
 
-	/* Free all the Tx ring sk_bufss */
+	/* Free all the Tx ring sk_buffs */
 	for (i = 0; i < tx_ring->count; i++)
 		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
@@ -1101,7 +1101,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
  * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
  * @port_info: port_info structure containing the current link speed
  * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
- * @itr: itr value to update
+ * @itr: ITR value to update
  *
  * Calculate how big of an increment should be applied to the ITR value passed
  * in based on wmem_default, SKB overhead, Ethernet overhead, and the current
@@ -1316,7 +1316,7 @@ clear_counts:
  */
 static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
 {
-	/* The itr value is reported in microseconds, and the register value is
+	/* The ITR value is reported in microseconds, and the register value is
 	 * recorded in 2 microsecond units. For this reason we only need to
 	 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
 	 * granularity as a shift instead of division. The mask makes sure the
@@ -1645,7 +1645,7 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 	return;
 
 dma_error:
-	/* clear dma mappings for failed tx_buf map */
+	/* clear DMA mappings for failed tx_buf map */
 	for (;;) {
 		tx_buf = &tx_ring->tx_buf[i];
 		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
@@ -1874,10 +1874,10 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 	cd_mss = skb_shinfo(skb)->gso_size;
 
 	/* record cdesc_qw1 with TSO parameters */
-	off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
-			 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
-			 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
-			 (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			     (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			     (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
 	first->tx_flags |= ICE_TX_FLAGS_TSO;
 	return 1;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 66e05032ee56..ec76aba347b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -58,19 +58,19 @@ struct ice_tx_buf {
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
-	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
 };
 
 struct ice_tx_offload_params {
-	u8 header_len;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
 	u32 td_cmd;
 	u32 td_offset;
 	u32 td_l2tag1;
-	u16 cd_l2tag2;
 	u32 cd_tunnel_params;
-	u64 cd_qw1;
-	struct ice_ring *tx_ring;
+	u16 cd_l2tag2;
+	u8 header_len;
 };
 
 struct ice_rx_buf {
@@ -150,6 +150,7 @@ enum ice_rx_dtype {
 
 /* descriptor ring, associated with a VSI */
 struct ice_ring {
+	/* CL1 - 1st cacheline starts here */
 	struct ice_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
@@ -161,11 +162,11 @@ struct ice_ring {
 		struct ice_tx_buf *tx_buf;
 		struct ice_rx_buf *rx_buf;
 	};
+	/* CL2 - 2nd cacheline starts here */
 	u16 q_index;			/* Queue number of ring */
-	u32 txq_teid;			/* Added Tx queue TEID */
-#ifdef CONFIG_DCB
-	u8 dcb_tc;		/* Traffic class of ring */
-#endif /* CONFIG_DCB */
+	u16 q_handle;			/* Queue handle per TC */
+
+	u8 ring_active:1;		/* is ring online or not */
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -173,8 +174,7 @@ struct ice_ring {
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
-
-	u8 ring_active;			/* is ring online or not */
+	u16 next_to_alloc;
 
 	/* stats structs */
 	struct ice_q_stats	stats;
@@ -184,10 +184,17 @@ struct ice_ring {
 		struct ice_rxq_stats rx_stats;
 	};
 
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
+	/* CLX - the below items are only accessed infrequently and should be
+	 * in their own cache line if possible
+	 */
+	dma_addr_t dma;			/* physical address of ring */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 rx_buf_len;
+#ifdef CONFIG_DCB
+	u8 dcb_tc;			/* Traffic class of ring */
+#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
 struct ice_ring_container {
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a862af4cbf78..24bbef8bbe69 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -23,6 +23,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
 
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_FW_LOG		BIT_ULL(3)
 #define ICE_DBG_LINK		BIT_ULL(4)
 #define ICE_DBG_PHY		BIT_ULL(5)
 #define ICE_DBG_QCTX		BIT_ULL(6)
@@ -61,6 +62,13 @@ enum ice_fc_mode {
 	ICE_FC_DFLT
 };
 
+enum ice_fec_mode {
+	ICE_FEC_NONE = 0,
+	ICE_FEC_RS,
+	ICE_FEC_BASER,
+	ICE_FEC_AUTO
+};
+
 enum ice_set_fc_aq_failures {
 	ICE_SET_FC_AQ_FAIL_NONE = 0,
 	ICE_SET_FC_AQ_FAIL_GET,
@@ -86,12 +94,14 @@ enum ice_media_type {
 enum ice_vsi_type {
 	ICE_VSI_PF = 0,
 	ICE_VSI_VF,
+	ICE_VSI_LB = 6,
 };
 
 struct ice_link_status {
 	/* Refer to ice_aq_phy_type for bits definition */
 	u64 phy_type_low;
 	u64 phy_type_high;
+	u8 topo_media_conflict;
 	u16 max_frame_size;
 	u16 link_speed;
 	u16 req_speeds;
@@ -99,6 +109,7 @@ struct ice_link_status {
 	u8 link_info;
 	u8 an_info;
 	u8 ext_info;
+	u8 fec_info;
 	u8 pacing;
 	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
 	 * ice_aqc_get_phy_caps structure
@@ -423,7 +434,7 @@ struct ice_hw {
 	struct ice_fw_log_cfg fw_log;
 
 /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
- * register. Used for determining the itr/intrl granularity during
+ * register. Used for determining the ITR/intrl granularity during
  * initialization.
  */
 #define ICE_MAX_AGG_BW_200G	0x0
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a805cbdd69be..5d24b539648f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -103,7 +103,7 @@ ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
 	u16 link_speed;
 
 	if (link_up)
-		link_speed = ICE_AQ_LINK_SPEED_40GB;
+		link_speed = ICE_AQ_LINK_SPEED_100GB;
 	else
 		link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
 
@@ -141,32 +141,20 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
 }
 
 /**
- * ice_get_vf_vector - get VF interrupt vector register offset
- * @vf_msix: number of MSIx vector per VF on a PF
- * @vf_id: VF identifier
- * @i: index of MSIx vector
- */
-static u32 ice_get_vf_vector(int vf_msix, int vf_id, int i)
-{
-	return ((i == 0) ? VFINT_DYN_CTLN(vf_id) :
-		 VFINT_DYN_CTLN(((vf_msix - 1) * (vf_id)) + (i - 1)));
-}
-
-/**
  * ice_free_vf_res - Free a VF's resources
  * @vf: pointer to the VF info
  */
 static void ice_free_vf_res(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
-	int i, pf_vf_msix;
+	int i, last_vector_idx;
 
 	/* First, disable VF's configuration API to prevent OS from
 	 * accessing the VF's VSI after it's freed or invalidated.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
-	/* free vsi & disconnect it from the parent uplink */
+	/* free VSI and disconnect it from the parent uplink */
 	if (vf->lan_vsi_idx) {
 		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
 		vf->lan_vsi_idx = 0;
@@ -174,13 +162,10 @@ static void ice_free_vf_res(struct ice_vf *vf)
 		vf->num_mac = 0;
 	}
 
-	pf_vf_msix = pf->num_vf_msix;
+	last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1;
 	/* Disable interrupts so that VF starts in a known state */
-	for (i = 0; i < pf_vf_msix; i++) {
-		u32 reg_idx;
-
-		reg_idx = ice_get_vf_vector(pf_vf_msix, vf->vf_id, i);
-		wr32(&pf->hw, reg_idx, VFINT_DYN_CTLN_CLEARPBA_M);
+	for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+		wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
 		ice_flush(&pf->hw);
 	}
 	/* reset some of the state variables keeping track of the resources */
@@ -205,8 +190,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
 	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
 
-	first = vf->first_vector_idx +
-		hw->func_caps.common_cap.msix_vector_first_id;
+	first = vf->first_vector_idx;
 	last = first + pf->num_vf_msix - 1;
 	for (v = first; v <= last; v++) {
 		u32 reg;
@@ -232,6 +216,42 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 }
 
 /**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * If MSIX entries from the pf->irq_tracker were needed then we need to
+ * reset the irq_tracker->end and give back the entries we needed to
+ * num_avail_sw_msix.
+ *
+ * If no MSIX entries were taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+	struct ice_res_tracker *res;
+
+	if (!pf)
+		return -EINVAL;
+
+	res = pf->irq_tracker;
+	if (!res)
+		return -EINVAL;
+
+	/* give back irq_tracker resources used */
+	if (pf->sriov_base_vector < res->num_entries) {
+		res->end = res->num_entries;
+		pf->num_avail_sw_msix +=
+			res->num_entries - pf->sriov_base_vector;
+	}
+
+	pf->sriov_base_vector = 0;
+
+	return 0;
+}
+
+/**
  * ice_free_vfs - Free all VFs
  * @pf: pointer to the PF structure
  */
@@ -246,15 +266,6 @@ void ice_free_vfs(struct ice_pf *pf)
 	while (test_and_set_bit(__ICE_VF_DIS, pf->state))
 		usleep_range(1000, 2000);
 
-	/* Disable IOV before freeing resources. This lets any VF drivers
-	 * running in the host get themselves cleaned up before we yank
-	 * the carpet out from underneath their feet.
-	 */
-	if (!pci_vfs_assigned(pf->pdev))
-		pci_disable_sriov(pf->pdev);
-	else
-		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
-
 	/* Avoid wait time by stopping all VFs at the same time */
 	for (i = 0; i < pf->num_alloc_vfs; i++) {
 		struct ice_vsi *vsi;
@@ -270,6 +281,15 @@ void ice_free_vfs(struct ice_pf *pf)
 		clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
 	}
 
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
 	tmp = pf->num_alloc_vfs;
 	pf->num_vf_qps = 0;
 	pf->num_alloc_vfs = 0;
@@ -288,6 +308,10 @@ void ice_free_vfs(struct ice_pf *pf)
 		}
 	}
 
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
 	devm_kfree(&pf->pdev->dev, pf->vf);
 	pf->vf = NULL;
 
@@ -457,6 +481,22 @@ ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id)
 }
 
 /**
+ * ice_calc_vf_first_vector_idx - Calculate absolute MSIX vector index in HW
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in HW that is used by this VF and
+ * this will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return pf->hw.func_caps.common_cap.msix_vector_first_id +
+		pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix;
+}
+
+/**
  * ice_alloc_vsi_res - Setup VF VSI and its resources
  * @vf: pointer to the VF structure
  *
@@ -470,8 +510,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	struct ice_vsi *vsi;
 	int status = 0;
 
-	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+	/* first vector index is the VFs OICR index */
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
 
+	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
 	if (!vsi) {
 		dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
 		return -ENOMEM;
@@ -480,14 +522,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	vf->lan_vsi_idx = vsi->idx;
 	vf->lan_vsi_num = vsi->vsi_num;
 
-	/* first vector index is the VFs OICR index */
-	vf->first_vector_idx = vsi->hw_base_vector;
-	/* Since hw_base_vector holds the vector where data queue interrupts
-	 * starts, increment by 1 since VFs allocated vectors include OICR intr
-	 * as well.
-	 */
-	vsi->hw_base_vector += 1;
-
 	/* Check if port VLAN exist before, and restore it accordingly */
 	if (vf->port_vlan_id) {
 		ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true);
@@ -580,8 +614,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 
 	hw = &pf->hw;
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	first = vf->first_vector_idx +
-		hw->func_caps.common_cap.msix_vector_first_id;
+	first = vf->first_vector_idx;
 	last = (first + pf->num_vf_msix) - 1;
 	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 
@@ -687,6 +720,97 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
 }
 
 /**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf;
+
+	if (!vf || !q_vector)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* always add one to account for the OICR being the first MSIX */
+	return pf->sriov_base_vector + pf->num_vf_msix * vf->vf_id +
+		q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+	int i;
+
+	if (!res)
+		return -EINVAL;
+
+	for (i = res->num_entries - 1; i >= 0; i--)
+		if (res->list[i] & ICE_RES_VALID_BIT)
+			return i;
+
+	return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so in many cases the irq_tracker will not
+ * be needed. In these cases we just set the pf->sriov_base_vector and return
+ * success.
+ *
+ * If SR-IOV needs to use any pf->irq_tracker entries it updates the
+ * irq_tracker->end based on the first entry needed for SR-IOV. This makes it
+ * so any calls to ice_get_res() using the irq_tracker will not try to use
+ * resources at or beyond the newly set value.
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 pf_total_msix_vectors =
+		pf->hw.func_caps.common_cap.num_msix_vectors;
+	struct ice_res_tracker *res = pf->irq_tracker;
+	int sriov_base_vector;
+
+	if (max_valid_res_idx < 0)
+		return max_valid_res_idx;
+
+	sriov_base_vector = pf_total_msix_vectors - num_msix_needed;
+
+	/* make sure we only grab irq_tracker entries from the list end and
+	 * that we have enough available MSIX vectors
+	 */
+	if (sriov_base_vector <= max_valid_res_idx)
+		return -EINVAL;
+
+	pf->sriov_base_vector = sriov_base_vector;
+
+	/* dip into irq_tracker entries and update used resources */
+	if (num_msix_needed > (pf_total_msix_vectors - res->num_entries)) {
+		pf->num_avail_sw_msix -=
+			res->num_entries - pf->sriov_base_vector;
+		res->end = pf->sriov_base_vector;
+	}
+
+	return 0;
+}
+
+/**
  * ice_check_avail_res - check if vectors and queues are available
  * @pf: pointer to the PF structure
  *
@@ -696,11 +820,16 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
  */
 static int ice_check_avail_res(struct ice_pf *pf)
 {
-	u16 num_msix, num_txq, num_rxq;
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 num_msix, num_txq, num_rxq, num_avail_msix;
 
-	if (!pf->num_alloc_vfs)
+	if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
 		return -EINVAL;
 
+	/* add 1 to max_valid_res_idx to account for it being 0-based */
+	num_avail_msix = pf->hw.func_caps.common_cap.num_msix_vectors -
+		(max_valid_res_idx + 1);
+
 	/* Grab from HW interrupts common pool
 	 * Note: By the time the user decides it needs more vectors in a VF
 	 * its already too late since one must decide this prior to creating the
@@ -717,11 +846,11 @@ static int ice_check_avail_res(struct ice_pf *pf)
 	 * grab default interrupt vectors (5 as supported by AVF driver).
 	 */
 	if (pf->num_alloc_vfs <= 16) {
-		num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+		num_msix = ice_determine_res(pf, num_avail_msix,
 					     ICE_MAX_INTR_PER_VF,
 					     ICE_MIN_INTR_PER_VF);
 	} else if (pf->num_alloc_vfs <= ICE_MAX_VF_COUNT) {
-		num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+		num_msix = ice_determine_res(pf, num_avail_msix,
 					     ICE_DFLT_INTR_PER_VF,
 					     ICE_MIN_INTR_PER_VF);
 	} else {
@@ -750,6 +879,9 @@ static int ice_check_avail_res(struct ice_pf *pf)
 	if (!num_txq || !num_rxq)
 		return -EIO;
 
+	if (ice_sriov_set_msix_res(pf, num_msix * pf->num_alloc_vfs))
+		return -EINVAL;
+
 	/* since AVF driver works with only queue pairs which means, it expects
 	 * to have equal number of Rx and Tx queues, so take the minimum of
 	 * available Tx or Rx queues
@@ -938,6 +1070,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 		vf->num_vf_qs = 0;
 	}
 
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
 	if (ice_check_avail_res(pf)) {
 		dev_err(&pf->pdev->dev,
 			"Cannot allocate VF resources, try with fewer number of VFs\n");
@@ -1119,7 +1255,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	int i, ret;
 
 	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
-	wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
 	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
 
 	ice_flush(hw);
@@ -1134,7 +1270,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 			   GFP_KERNEL);
 	if (!vfs) {
 		ret = -ENOMEM;
-		goto err_unroll_sriov;
+		goto err_pci_disable_sriov;
 	}
 	pf->vf = vfs;
 
@@ -1154,12 +1290,19 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	pf->num_alloc_vfs = num_alloc_vfs;
 
 	/* VF resources get allocated during reset */
-	if (!ice_reset_all_vfs(pf, true))
+	if (!ice_reset_all_vfs(pf, true)) {
+		ret = -EIO;
 		goto err_unroll_sriov;
+	}
 
 	goto err_unroll_intr;
 
 err_unroll_sriov:
+	pf->vf = NULL;
+	devm_kfree(&pf->pdev->dev, vfs);
+	vfs = NULL;
+	pf->num_alloc_vfs = 0;
+err_pci_disable_sriov:
 	pci_disable_sriov(pf->pdev);
 err_unroll_intr:
 	/* rearm interrupts here */
@@ -1168,8 +1311,8 @@ err_unroll_intr:
 }
 
 /**
- * ice_pf_state_is_nominal - checks the pf for nominal state
- * @pf: pointer to pf to check
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
  *
  * Check the PF's state for a collection of bits that would indicate
  * the PF is in a state that would inhibit normal operation for
@@ -1496,7 +1639,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf)
 
 /**
  * ice_find_vsi_from_id
- * @pf: the pf structure to search for the VSI
+ * @pf: the PF structure to search for the VSI
  * @id: ID of the VSI it is searching for
  *
  * searches for the VSI with the given ID
@@ -1807,28 +1950,37 @@ error_param:
 static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 {
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
-	struct virtchnl_irq_map_info *irqmap_info =
-	    (struct virtchnl_irq_map_info *)msg;
+	struct virtchnl_irq_map_info *irqmap_info;
 	u16 vsi_id, vsi_q_id, vector_id;
 	struct virtchnl_vector_map *map;
-	struct ice_vsi *vsi = NULL;
 	struct ice_pf *pf = vf->pf;
+	u16 num_q_vectors_mapped;
+	struct ice_vsi *vsi;
 	unsigned long qmap;
-	u16 num_q_vectors;
 	int i;
 
-	num_q_vectors = irqmap_info->num_vectors - ICE_NONQ_VECS_VF;
+	irqmap_info = (struct virtchnl_irq_map_info *)msg;
+	num_q_vectors_mapped = irqmap_info->num_vectors;
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
 
+	/* Check to make sure number of VF vectors mapped is not greater than
+	 * number of VF vectors originally allocated, and check that
+	 * there is actually at least a single VF queue vector mapped
+	 */
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
-	    !vsi || vsi->num_q_vectors < num_q_vectors ||
-	    irqmap_info->num_vectors == 0) {
+	    pf->num_vf_msix < num_q_vectors_mapped ||
+	    !irqmap_info->num_vectors) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
 
-	for (i = 0; i < num_q_vectors; i++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+	for (i = 0; i < num_q_vectors_mapped; i++) {
+		struct ice_q_vector *q_vector;
 
 		map = &irqmap_info->vecmap[i];
 
@@ -1836,7 +1988,21 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 		vsi_id = map->vsi_id;
 		/* validate msg params */
 		if (!(vector_id < pf->hw.func_caps.common_cap
-		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id)) {
+		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		    (!vector_id && (map->rxq_map || map->txq_map))) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* No need to map VF miscellaneous or rogue vector */
+		if (!vector_id)
+			continue;
+
+		/* Subtract non queue vector from vector_id passed by VF
+		 * to get actual number of VSI queue vector array index
+		 */
+		q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+		if (!q_vector) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
 		}
@@ -1852,6 +2018,8 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 			q_vector->num_ring_rx++;
 			q_vector->rx.itr_idx = map->rxitr_idx;
 			vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->rx.itr_idx);
 		}
 
 		qmap = map->txq_map;
@@ -1864,11 +2032,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 			q_vector->num_ring_tx++;
 			q_vector->tx.itr_idx = map->txitr_idx;
 			vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->tx.itr_idx);
 		}
 	}
 
-	if (vsi)
-		ice_vsi_cfg_msix(vsi);
 error_param:
 	/* send the response to the VF */
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
@@ -1903,9 +2071,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	}
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!vsi) {
+	if (!vsi)
 		goto error_param;
-	}
 
 	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) {
 		dev_err(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 3725aea16840..c3ca522c245a 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -49,29 +49,34 @@ struct ice_vf {
 	struct ice_pf *pf;
 
 	s16 vf_id;			/* VF ID in the PF space */
-	u32 driver_caps;		/* reported by VF driver */
+	u16 lan_vsi_idx;		/* index into PF struct */
 	int first_vector_idx;		/* first vector index of this VF */
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
 	struct virtchnl_version_info vf_ver;
+	u32 driver_caps;		/* reported by VF driver */
 	struct virtchnl_ether_addr dflt_lan_addr;
 	u16 port_vlan_id;
-	u8 pf_set_mac;			/* VF MAC address set by VMM admin */
-	u8 trusted;
-	u16 lan_vsi_idx;		/* index into PF struct */
+	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
+	u8 trusted:1;
+	u8 spoofchk:1;
+	u8 link_forced:1;
+	u8 link_up:1;			/* only valid if VF link is forced */
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
 	u16 lan_vsi_num;		/* ID as used by firmware */
+	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
+	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
+
 	u64 num_mdd_events;		/* number of MDD events detected */
 	u64 num_inval_msgs;		/* number of continuous invalid msgs */
 	u64 num_valid_msgs;		/* number of valid msgs detected */
 	unsigned long vf_caps;		/* VF's adv. capabilities */
-	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
-	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
-	u8 link_forced;
-	u8 link_up;			/* only valid if VF link is forced */
-	u8 spoofchk;
+	u8 num_req_qs;			/* num of queue pairs requested by VF */
 	u16 num_mac;
 	u16 num_vlan;
 	u16 num_vf_qs;			/* num of queue configured per VF */
-	u8 num_req_qs;			/* num of queue pairs requested by VF */
 };
 
 #ifdef CONFIG_PCI_IOV
@@ -96,6 +101,8 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
 int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
 
 int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
@@ -161,5 +168,11 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev,
 	return -EOPNOTSUPP;
 }
 
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+		    struct ice_q_vector __always_unused *q_vector)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_IOV */
 #endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index bafdcf70a353..3ec2ce0725d5 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -638,7 +638,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
 			dev_spec->sgmii_active = true;
 			break;
 		}
-		/* fall through for I2C based SGMII */
+		/* fall through - for I2C based SGMII */
 	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
 		/* read media type from SFP EEPROM */
 		ret_val = igb_set_sfp_media_type_82575(hw);
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 0ad737d2f289..9cb49980ec2d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -409,6 +409,8 @@ do { \
 #define E1000_I210_TQAVCC(_n)	(0x3004 + ((_n) * 0x40))
 #define E1000_I210_TQAVHC(_n)	(0x300C + ((_n) * 0x40))
 
+#define E1000_I210_RR2DCDELAY	0x5BF4
+
 #define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
 #define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
 
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index c645d9e648e0..3182b059bf55 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -448,7 +448,7 @@ static void igb_set_msglevel(struct net_device *netdev, u32 data)
 
 static int igb_get_regs_len(struct net_device *netdev)
 {
-#define IGB_REGS_LEN 739
+#define IGB_REGS_LEN 740
 	return IGB_REGS_LEN * sizeof(u32);
 }
 
@@ -675,41 +675,44 @@ static void igb_get_regs(struct net_device *netdev,
 		regs_buff[554] = adapter->stats.b2ogprc;
 	}
 
-	if (hw->mac.type != e1000_82576)
-		return;
-	for (i = 0; i < 12; i++)
-		regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
-	for (i = 0; i < 4; i++)
-		regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
-
-	for (i = 0; i < 12; i++)
-		regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+	if (hw->mac.type == e1000_82576) {
+		for (i = 0; i < 12; i++)
+			regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
+		for (i = 0; i < 4; i++)
+			regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
+
+		for (i = 0; i < 12; i++)
+			regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+	}
+
+	if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+		regs_buff[739] = rd32(E1000_I210_RR2DCDELAY);
 }
 
 static int igb_get_eeprom_len(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 39f33afc479c..b4df3e319467 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -753,6 +753,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
 		netdev_err(netdev, "PCIe link lost\n");
+		WARN(1, "igb: Failed to read reg 0x%x!\n", reg);
 	}
 
 	return value;
@@ -2577,11 +2578,11 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
 #define VLAN_PRIO_FULL_MASK (0x07)
 
 static int igb_parse_cls_flower(struct igb_adapter *adapter,
-				struct tc_cls_flower_offload *f,
+				struct flow_cls_offload *f,
 				int traffic_class,
 				struct igb_nfc_filter *input)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	struct netlink_ext_ack *extack = f->common.extack;
 
@@ -2659,7 +2660,7 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter,
 }
 
 static int igb_configure_clsflower(struct igb_adapter *adapter,
-				   struct tc_cls_flower_offload *cls_flower)
+				   struct flow_cls_offload *cls_flower)
 {
 	struct netlink_ext_ack *extack = cls_flower->common.extack;
 	struct igb_nfc_filter *filter, *f;
@@ -2721,7 +2722,7 @@ err_parse:
 }
 
 static int igb_delete_clsflower(struct igb_adapter *adapter,
-				struct tc_cls_flower_offload *cls_flower)
+				struct flow_cls_offload *cls_flower)
 {
 	struct igb_nfc_filter *filter;
 	int err;
@@ -2751,14 +2752,14 @@ out:
 }
 
 static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
-				   struct tc_cls_flower_offload *cls_flower)
+				   struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return igb_configure_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return igb_delete_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
@@ -2782,25 +2783,6 @@ static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int igb_setup_tc_block(struct igb_adapter *adapter,
-			      struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int igb_offload_txtime(struct igb_adapter *adapter,
 			      struct tc_etf_qopt_offload *qopt)
 {
@@ -2824,6 +2806,8 @@ static int igb_offload_txtime(struct igb_adapter *adapter,
 	return 0;
 }
 
+static LIST_HEAD(igb_block_cb_list);
+
 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			void *type_data)
 {
@@ -2833,7 +2817,11 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	case TC_SETUP_QDISC_CBS:
 		return igb_offload_cbs(adapter, type_data);
 	case TC_SETUP_BLOCK:
-		return igb_setup_tc_block(adapter, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &igb_block_cb_list,
+						  igb_setup_tc_block_cb,
+						  adapter, adapter, true);
+
 	case TC_SETUP_QDISC_ETF:
 		return igb_offload_txtime(adapter, type_data);
 
@@ -5687,6 +5675,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
 	 */
 	if (tx_ring->launchtime_enable) {
 		ts = ns_to_timespec64(first->skb->tstamp);
+		first->skb->tstamp = 0;
 		context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
 	} else {
 		context_desc->seqnum_seed = 0;
@@ -6695,7 +6684,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
 			igb_setup_dca(adapter);
 			break;
 		}
-		/* Fall Through since DCA is disabled. */
+		/* Fall Through - since DCA is disabled. */
 	case DCA_PROVIDER_REMOVE:
 		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
 			/* without this a class_device is left
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 51a8b8769c67..59258d791106 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -10,50 +10,6 @@
 #include "igc.h"
 
 /**
- * igc_set_pcie_completion_timeout - set pci-e completion timeout
- * @hw: pointer to the HW structure
- */
-static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
-{
-	u32 gcr = rd32(IGC_GCR);
-	u16 pcie_devctl2;
-	s32 ret_val = 0;
-
-	/* only take action if timeout value is defaulted to 0 */
-	if (gcr & IGC_GCR_CMPL_TMOUT_MASK)
-		goto out;
-
-	/* if capabilities version is type 1 we can write the
-	 * timeout of 10ms to 200ms through the GCR register
-	 */
-	if (!(gcr & IGC_GCR_CAP_VER2)) {
-		gcr |= IGC_GCR_CMPL_TMOUT_10ms;
-		goto out;
-	}
-
-	/* for version 2 capabilities we need to write the config space
-	 * directly in order to set the completion timeout value for
-	 * 16ms to 55ms
-	 */
-	ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					&pcie_devctl2);
-	if (ret_val)
-		goto out;
-
-	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
-
-	ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					 &pcie_devctl2);
-out:
-	/* disable completion timeout resend */
-	gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND;
-
-	wr32(IGC_GCR, gcr);
-
-	return ret_val;
-}
-
-/**
  * igc_reset_hw_base - Reset hardware
  * @hw: pointer to the HW structure
  *
@@ -72,11 +28,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw)
 	if (ret_val)
 		hw_dbg("PCI-E Master disable polling has failed.\n");
 
-	/* set the completion timeout for interface */
-	ret_val = igc_set_pcie_completion_timeout(hw);
-	if (ret_val)
-		hw_dbg("PCI-E Set completion timeout has failed.\n");
-
 	hw_dbg("Masking off all interrupts\n");
 	wr32(IGC_IMC, 0xffffffff);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index a9a30268de59..fc0ccfe38a20 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -5,8 +5,8 @@
 #define _IGC_DEFINES_H_
 
 /* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define REQ_TX_DESCRIPTOR_MULTIPLE  8
-#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+#define REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define REQ_RX_DESCRIPTOR_MULTIPLE	8
 
 #define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
 
@@ -29,12 +29,6 @@
 /* Status of Master requests. */
 #define IGC_STATUS_GIO_MASTER_ENABLE	0x00080000
 
-/* PCI Express Control */
-#define IGC_GCR_CMPL_TMOUT_MASK		0x0000F000
-#define IGC_GCR_CMPL_TMOUT_10ms		0x00001000
-#define IGC_GCR_CMPL_TMOUT_RESEND	0x00010000
-#define IGC_GCR_CAP_VER2		0x00040000
-
 /* Receive Address
  * Number of high/low register pairs in the RAR. The RAR (Receive Address
  * Registers) holds the directed and multicast addresses that we monitor.
@@ -72,6 +66,9 @@
 
 #define IGC_CONNSW_AUTOSENSE_EN	0x1
 
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE	0x2600
+
 /* PBA constants */
 #define IGC_PBA_34K		0x0022
 
@@ -264,9 +261,6 @@
 #define IGC_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
 #define IGC_TCTL_MULR		0x10000000 /* Multiple request support */
 
-#define IGC_CT_SHIFT			4
-#define IGC_COLLISION_THRESHOLD		15
-
 /* Flow Control Constants */
 #define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
 #define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
@@ -398,7 +392,7 @@
 #define IGC_MDIC_ERROR		0x40000000
 #define IGC_MDIC_DEST		0x80000000
 
-#define IGC_N0_QUEUE -1
+#define IGC_N0_QUEUE		-1
 
 #define IGC_MAX_MAC_HDR_LEN	127
 #define IGC_MAX_NETWORK_HDR_LEN	511
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 7c88b7bd4799..1039a224ac80 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -114,11 +114,8 @@ struct igc_nvm_operations {
 
 struct igc_phy_operations {
 	s32 (*acquire)(struct igc_hw *hw);
-	s32 (*check_polarity)(struct igc_hw *hw);
 	s32 (*check_reset_block)(struct igc_hw *hw);
 	s32 (*force_speed_duplex)(struct igc_hw *hw);
-	s32 (*get_cfg_done)(struct igc_hw *hw);
-	s32 (*get_cable_length)(struct igc_hw *hw);
 	s32 (*get_phy_info)(struct igc_hw *hw);
 	s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
 	void (*release)(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index f7683d3ae47c..ba4646737288 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -8,7 +8,6 @@
 #include "igc_hw.h"
 
 /* forward declaration */
-static s32 igc_set_default_fc(struct igc_hw *hw);
 static s32 igc_set_fc_watermarks(struct igc_hw *hw);
 
 /**
@@ -96,13 +95,10 @@ s32 igc_setup_link(struct igc_hw *hw)
 		goto out;
 
 	/* If requested flow control is set to default, set flow control
-	 * based on the EEPROM flow control settings.
+	 * to the both 'rx' and 'tx' pause frames.
 	 */
-	if (hw->fc.requested_mode == igc_fc_default) {
-		ret_val = igc_set_default_fc(hw);
-		if (ret_val)
-			goto out;
-	}
+	if (hw->fc.requested_mode == igc_fc_default)
+		hw->fc.requested_mode = igc_fc_full;
 
 	/* We want to save off the original Flow Control configuration just
 	 * in case we get disconnected and then reconnected into a different
@@ -136,19 +132,6 @@ out:
 }
 
 /**
- * igc_set_default_fc - Set flow control default values
- * @hw: pointer to the HW structure
- *
- * Read the EEPROM for the default values for flow control and store the
- * values.
- */
-static s32 igc_set_default_fc(struct igc_hw *hw)
-{
-	hw->fc.requested_mode = igc_fc_full;
-	return 0;
-}
-
-/**
  * igc_force_mac_fc - Force the MAC's flow control settings
  * @hw: pointer to the HW structure
  *
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 34fa0e60a780..93f3b4e6185b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -72,6 +72,27 @@ void igc_reset(struct igc_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	struct igc_hw *hw = &adapter->hw;
+	struct igc_fc_info *fc = &hw->fc;
+	u32 pba, hwm;
+
+	/* Repartition PBA for greater than 9k MTU if required */
+	pba = IGC_PBA_34K;
+
+	/* flow control settings
+	 * The high water mark must be low enough to fit one full frame
+	 * after transmitting the pause frame.  As such we must have enough
+	 * space to allow for us to complete our current transmit and then
+	 * receive the frame that is in progress from the link partner.
+	 * Set it to:
+	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+	 */
+	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
 
 	hw->mac.ops.reset_hw(hw);
 
@@ -3934,6 +3955,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
 		hw->hw_addr = NULL;
 		netif_device_detach(netdev);
 		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		WARN(1, "igc: Failed to read reg 0x%x!\n", reg);
 	}
 
 	return value;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 08d85e336bd4..39e73ad60352 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -50,8 +50,6 @@
 #define IXGBE_MAX_RXD			   4096
 #define IXGBE_MIN_RXD			     64
 
-#define IXGBE_ETH_P_LLDP		 0x88CC
-
 /* flow control */
 #define IXGBE_MIN_FCRTL			   0x40
 #define IXGBE_MAX_FCRTL			0x7FF80
@@ -635,6 +633,7 @@ struct ixgbe_adapter {
 	/* XDP */
 	int num_xdp_queues;
 	struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES];
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled rings */
 
 	/* TX */
 	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
@@ -774,11 +773,6 @@ struct ixgbe_adapter {
 #ifdef CONFIG_IXGBE_IPSEC
 	struct ixgbe_ipsec *ipsec;
 #endif /* CONFIG_IXGBE_IPSEC */
-
-	/* AF_XDP zero-copy */
-	struct xdp_umem **xsk_umems;
-	u16 num_xsk_umems_used;
-	u16 num_xsk_umems;
 };
 
 static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
@@ -1039,4 +1033,10 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
 static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
 					u32 *mbuf, u32 vf) { return -EACCES; }
 #endif /* CONFIG_IXGBE_IPSEC */
+
+static inline bool ixgbe_enabled_xdp_adapter(struct ixgbe_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index acba067cc15a..7c52ae8ac005 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3226,7 +3226,8 @@ static int ixgbe_get_module_info(struct net_device *dev,
 		page_swap = true;
 	}
 
-	if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap) {
+	if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap ||
+	    !(addr_mode & IXGBE_SFF_DDM_IMPLEMENTED)) {
 		/* We have a SFP, but it does not support SFF-8472 */
 		modinfo->type = ETH_MODULE_SFF_8079;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index ff85ce5791a3..31629fc7e820 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -842,6 +842,9 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
 	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	int i;
 
+	if (!ipsec)
+		return;
+
 	/* search rx sa table */
 	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
 		if (!ipsec->rx_tbl[i].used)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 57fd9ee6de66..cbaf712d6529 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6288,6 +6288,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
 	if (ixgbe_init_rss_key(adapter))
 		return -ENOMEM;
 
+	adapter->af_xdp_zc_qps = bitmap_zalloc(MAX_XDP_QUEUES, GFP_KERNEL);
+	if (!adapter->af_xdp_zc_qps)
+		return -ENOMEM;
+
 	/* Set MAC specific capability flags and exceptions */
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
@@ -9603,27 +9607,6 @@ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int ixgbe_setup_tc_block(struct net_device *dev,
-				struct tc_block_offload *f)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int ixgbe_setup_tc_mqprio(struct net_device *dev,
 				 struct tc_mqprio_qopt *mqprio)
 {
@@ -9631,12 +9614,19 @@ static int ixgbe_setup_tc_mqprio(struct net_device *dev,
 	return ixgbe_setup_tc(dev, mqprio->num_tc);
 }
 
+static LIST_HEAD(ixgbe_block_cb_list);
+
 static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			    void *type_data)
 {
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return ixgbe_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &ixgbe_block_cb_list,
+						  ixgbe_setup_tc_block_cb,
+						  adapter, adapter, true);
 	case TC_SETUP_QDISC_MQPRIO:
 		return ixgbe_setup_tc_mqprio(dev, type_data);
 	default:
@@ -11161,6 +11151,7 @@ err_sw_init:
 	kfree(adapter->jump_tables[0]);
 	kfree(adapter->mac_table);
 	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
 err_ioremap:
 	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
 	free_netdev(netdev);
@@ -11249,6 +11240,7 @@ static void ixgbe_remove(struct pci_dev *pdev)
 
 	kfree(adapter->mac_table);
 	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
 	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
 	free_netdev(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index 214b01085718..6544c4539c0d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -45,6 +45,7 @@
 #define IXGBE_SFF_SOFT_RS_SELECT_10G		0x8
 #define IXGBE_SFF_SOFT_RS_SELECT_1G		0x0
 #define IXGBE_SFF_ADDRESSING_MODE		0x4
+#define IXGBE_SFF_DDM_IMPLEMENTED		0x40
 #define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE		0x1
 #define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE		0x8
 #define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE	0x23
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index d81a50dc9535..0be13a90ff79 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -72,13 +72,13 @@
 #define IXGBE_INCPER_SHIFT_82599 24
 
 #define IXGBE_OVERFLOW_PERIOD    (HZ * 30)
-#define IXGBE_PTP_TX_TIMEOUT     (HZ * 15)
+#define IXGBE_PTP_TX_TIMEOUT     (HZ)
 
-/* half of a one second clock period, for use with PPS signal. We have to use
- * this instead of something pre-defined like IXGBE_PTP_PPS_HALF_SECOND, in
- * order to force at least 64bits of precision for shifting
+/* We use our own definitions instead of NSEC_PER_SEC because we want to mark
+ * the value as a ULL to force precision when bit shifting.
  */
-#define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL
+#define NS_PER_SEC      1000000000ULL
+#define NS_PER_HALF_SEC  500000000ULL
 
 /* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL
  * which contain measurements of seconds and nanoseconds respectively. This
@@ -141,23 +141,26 @@
 #define MAX_TIMADJ	0x7FFFFFFF
 
 /**
- * ixgbe_ptp_setup_sdp_x540
+ * ixgbe_ptp_setup_sdp_X540
  * @adapter: private adapter structure
  *
  * this function enables or disables the clock out feature on SDP0 for
- * the X540 device. It will create a 1second periodic output that can
+ * the X540 device. It will create a 1 second periodic output that can
  * be used as the PPS (via an interrupt).
  *
- * It calculates when the systime will be on an exact second, and then
- * aligns the start of the PPS signal to that value. The shift is
- * necessary because it can change based on the link speed.
+ * It calculates when the system time will be on an exact second, and then
+ * aligns the start of the PPS signal to that value.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
  */
-static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter)
 {
+	struct cyclecounter *cc = &adapter->hw_cc;
 	struct ixgbe_hw *hw = &adapter->hw;
-	int shift = adapter->hw_cc.shift;
 	u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem;
-	u64 ns = 0, clock_edge = 0;
+	u64 ns = 0, clock_edge = 0, clock_period;
+	unsigned long flags;
 
 	/* disable the pin first */
 	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
@@ -177,26 +180,33 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
 	/* enable the Clock Out feature on SDP0, and allow
 	 * interrupts to occur when the pin changes
 	 */
-	tsauxc = IXGBE_TSAUXC_EN_CLK |
-		 IXGBE_TSAUXC_SYNCLK |
-		 IXGBE_TSAUXC_SDP0_INT;
+	tsauxc = (IXGBE_TSAUXC_EN_CLK |
+		  IXGBE_TSAUXC_SYNCLK |
+		  IXGBE_TSAUXC_SDP0_INT);
 
-	/* clock period (or pulse length) */
-	clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
-	clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
-
-	/* Account for the cyclecounter wrap-around value by
-	 * using the converted ns value of the current time to
-	 * check for when the next aligned second would occur.
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflow.
 	 */
-	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
-	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
-	ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge);
+	clock_period = div_u64((NS_PER_HALF_SEC << cc->shift), cc->mult);
+	clktiml = (u32)(clock_period);
+	clktimh = (u32)(clock_period >> 32);
 
-	div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
-	clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	/* Figure out how many seconds to add in order to round up */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
 
-	/* specify the initial clock start time */
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
 	trgttiml = (u32)clock_edge;
 	trgttimh = (u32)(clock_edge >> 32);
 
@@ -212,8 +222,100 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ptp_setup_sdp_X550
+ * @adapter: private adapter structure
+ *
+ * Enable or disable a clock output signal on SDP 0 for X550 hardware.
+ *
+ * Use the target time feature to align the output signal on the next full
+ * second.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
+ */
+static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter)
+{
+	u32 esdp, tsauxc, freqout, trgttiml, trgttimh, rem, tssdp;
+	struct cyclecounter *cc = &adapter->hw_cc;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 ns = 0, clock_edge = 0;
+	struct timespec64 ts;
+	unsigned long flags;
+
+	/* disable the pin first */
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+	IXGBE_WRITE_FLUSH(hw);
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+		return;
+
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* enable the SDP0 pin as output, and connected to the
+	 * native function for Timesync (ClockOut)
+	 */
+	esdp |= IXGBE_ESDP_SDP0_DIR |
+		IXGBE_ESDP_SDP0_NATIVE;
+
+	/* enable the Clock Out feature on SDP0, and use Target Time 0 to
+	 * enable generation of interrupts on the clock change.
+	 */
+#define IXGBE_TSAUXC_DIS_TS_CLEAR 0x40000000
+	tsauxc = (IXGBE_TSAUXC_EN_CLK | IXGBE_TSAUXC_ST0 |
+		  IXGBE_TSAUXC_EN_TT0 | IXGBE_TSAUXC_SDP0_INT |
+		  IXGBE_TSAUXC_DIS_TS_CLEAR);
+
+	tssdp = (IXGBE_TSSDP_TS_SDP0_EN |
+		 IXGBE_TSSDP_TS_SDP0_CLK0);
+
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflowing a 32bit
+	 * value.
+	 */
+	freqout = div_u64(NS_PER_HALF_SEC << cc->shift,  cc->mult);
+
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	/* Figure out how far past the next second we are */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
+
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
+
+	/* X550 hardware stores the time in 32bits of 'billions of cycles' and
+	 * 32bits of 'cycles'. There's no guarantee that cycles represents
+	 * nanoseconds. However, we can use the math from a timespec64 to
+	 * convert into the hardware representation.
+	 *
+	 * See ixgbe_ptp_read_X550() for more details.
+	 */
+	ts = ns_to_timespec64(clock_edge);
+	trgttiml = (u32)ts.tv_nsec;
+	trgttimh = (u32)ts.tv_sec;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FREQOUT0, freqout);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSSDP, tssdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
  * ixgbe_ptp_read_X550 - read cycle counter value
- * @hw_cc: cyclecounter structure
+ * @cc: cyclecounter structure
  *
  * This function reads SYSTIME registers. It is called by the cyclecounter
  * structure to convert from internal representation into nanoseconds. We need
@@ -221,10 +323,10 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
  * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
  * "cycles", rather than seconds and nanoseconds.
  */
-static u64 ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
+static u64 ixgbe_ptp_read_X550(const struct cyclecounter *cc)
 {
 	struct ixgbe_adapter *adapter =
-			container_of(hw_cc, struct ixgbe_adapter, hw_cc);
+		container_of(cc, struct ixgbe_adapter, hw_cc);
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct timespec64 ts;
 
@@ -838,6 +940,15 @@ void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector,
 	ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
 }
 
+/**
+ * ixgbe_ptp_get_ts_config - get current hardware timestamping configuration
+ * @adapter: pointer to adapter structure
+ * @ifr: ioctl data
+ *
+ * This function returns the current timestamping settings. Rather than
+ * attempt to deconstruct registers to fill in the values, simply keep a copy
+ * of the old settings around, and return a copy when requested.
+ */
 int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
 {
 	struct hwtstamp_config *config = &adapter->tstamp_config;
@@ -1253,7 +1364,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
-		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X540;
 		break;
 	case ixgbe_mac_82599EB:
 		snprintf(adapter->ptp_caps.name,
@@ -1280,13 +1391,13 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 		adapter->ptp_caps.n_alarm = 0;
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.n_per_out = 0;
-		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.pps = 1;
 		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
 		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
-		adapter->ptp_setup_sdp = NULL;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X550;
 		break;
 	default:
 		adapter->ptp_clock = NULL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 345701af7749..537dfff585e0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1645,7 +1645,7 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP),
 				(IXGBE_ETQF_FILTER_EN    |
 				 IXGBE_ETQF_TX_ANTISPOOF |
-				 IXGBE_ETH_P_LLDP));
+				 ETH_P_LLDP));
 
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC),
 				(IXGBE_ETQF_FILTER_EN |
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 84f2dba39e36..2be1c4c72435 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1067,6 +1067,7 @@ struct ixgbe_nvm_version {
 #define IXGBE_AUXSTMPL1  0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
 #define IXGBE_AUXSTMPH1  0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
 #define IXGBE_TSIM       0x08C68 /* TimeSync Interrupt Mask Register - RW */
+#define IXGBE_TSSDP      0x0003C /* TimeSync SDP Configuration Register - RW */
 
 /* Diagnostic Registers */
 #define IXGBE_RDSTATCTL   0x02C20
@@ -2240,11 +2241,18 @@ enum {
 #define IXGBE_RXDCTL_RLPML_EN   0x00008000
 #define IXGBE_RXDCTL_VME        0x40000000  /* VLAN mode enable */
 
-#define IXGBE_TSAUXC_EN_CLK   0x00000004
-#define IXGBE_TSAUXC_SYNCLK   0x00000008
-#define IXGBE_TSAUXC_SDP0_INT 0x00000040
+#define IXGBE_TSAUXC_EN_CLK		0x00000004
+#define IXGBE_TSAUXC_SYNCLK		0x00000008
+#define IXGBE_TSAUXC_SDP0_INT		0x00000040
+#define IXGBE_TSAUXC_EN_TT0		0x00000001
+#define IXGBE_TSAUXC_EN_TT1		0x00000002
+#define IXGBE_TSAUXC_ST0		0x00000010
 #define IXGBE_TSAUXC_DISABLE_SYSTIME	0x80000000
 
+#define IXGBE_TSSDP_TS_SDP0_SEL_MASK	0x000000C0
+#define IXGBE_TSSDP_TS_SDP0_CLK0	0x00000080
+#define IXGBE_TSSDP_TS_SDP0_EN		0x00000100
+
 #define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
 #define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index bfe95ce0bd7f..6b609553329f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -14,57 +14,10 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
 	bool xdp_on = READ_ONCE(adapter->xdp_prog);
 	int qid = ring->ring_idx;
 
-	if (!adapter->xsk_umems || !adapter->xsk_umems[qid] ||
-	    qid >= adapter->num_xsk_umems || !xdp_on)
+	if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps))
 		return NULL;
 
-	return adapter->xsk_umems[qid];
-}
-
-static int ixgbe_alloc_xsk_umems(struct ixgbe_adapter *adapter)
-{
-	if (adapter->xsk_umems)
-		return 0;
-
-	adapter->num_xsk_umems_used = 0;
-	adapter->num_xsk_umems = adapter->num_rx_queues;
-	adapter->xsk_umems = kcalloc(adapter->num_xsk_umems,
-				     sizeof(*adapter->xsk_umems),
-				     GFP_KERNEL);
-	if (!adapter->xsk_umems) {
-		adapter->num_xsk_umems = 0;
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int ixgbe_add_xsk_umem(struct ixgbe_adapter *adapter,
-			      struct xdp_umem *umem,
-			      u16 qid)
-{
-	int err;
-
-	err = ixgbe_alloc_xsk_umems(adapter);
-	if (err)
-		return err;
-
-	adapter->xsk_umems[qid] = umem;
-	adapter->num_xsk_umems_used++;
-
-	return 0;
-}
-
-static void ixgbe_remove_xsk_umem(struct ixgbe_adapter *adapter, u16 qid)
-{
-	adapter->xsk_umems[qid] = NULL;
-	adapter->num_xsk_umems_used--;
-
-	if (adapter->num_xsk_umems == 0) {
-		kfree(adapter->xsk_umems);
-		adapter->xsk_umems = NULL;
-		adapter->num_xsk_umems = 0;
-	}
+	return xdp_get_umem_from_qid(adapter->netdev, qid);
 }
 
 static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter,
@@ -113,6 +66,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 				 struct xdp_umem *umem,
 				 u16 qid)
 {
+	struct net_device *netdev = adapter->netdev;
 	struct xdp_umem_fq_reuse *reuseq;
 	bool if_running;
 	int err;
@@ -120,12 +74,9 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 	if (qid >= adapter->num_rx_queues)
 		return -EINVAL;
 
-	if (adapter->xsk_umems) {
-		if (qid >= adapter->num_xsk_umems)
-			return -EINVAL;
-		if (adapter->xsk_umems[qid])
-			return -EBUSY;
-	}
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
 
 	reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count);
 	if (!reuseq)
@@ -138,14 +89,12 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 		return err;
 
 	if_running = netif_running(adapter->netdev) &&
-		     READ_ONCE(adapter->xdp_prog);
+		     ixgbe_enabled_xdp_adapter(adapter);
 
 	if (if_running)
 		ixgbe_txrx_ring_disable(adapter, qid);
 
-	err = ixgbe_add_xsk_umem(adapter, umem, qid);
-	if (err)
-		return err;
+	set_bit(qid, adapter->af_xdp_zc_qps);
 
 	if (if_running) {
 		ixgbe_txrx_ring_enable(adapter, qid);
@@ -161,20 +110,21 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 
 static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
 {
+	struct xdp_umem *umem;
 	bool if_running;
 
-	if (!adapter->xsk_umems || qid >= adapter->num_xsk_umems ||
-	    !adapter->xsk_umems[qid])
+	umem = xdp_get_umem_from_qid(adapter->netdev, qid);
+	if (!umem)
 		return -EINVAL;
 
 	if_running = netif_running(adapter->netdev) &&
-		     READ_ONCE(adapter->xdp_prog);
+		     ixgbe_enabled_xdp_adapter(adapter);
 
 	if (if_running)
 		ixgbe_txrx_ring_disable(adapter, qid);
 
-	ixgbe_xsk_umem_dma_unmap(adapter, adapter->xsk_umems[qid]);
-	ixgbe_remove_xsk_umem(adapter, qid);
+	clear_bit(qid, adapter->af_xdp_zc_qps);
+	ixgbe_xsk_umem_dma_unmap(adapter, umem);
 
 	if (if_running)
 		ixgbe_txrx_ring_enable(adapter, qid);
@@ -621,8 +571,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 	union ixgbe_adv_tx_desc *tx_desc = NULL;
 	struct ixgbe_tx_buffer *tx_bi;
 	bool work_done = true;
-	u32 len, cmd_type;
+	struct xdp_desc desc;
 	dma_addr_t dma;
+	u32 cmd_type;
 
 	while (budget-- > 0) {
 		if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
@@ -631,15 +582,18 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 			break;
 		}
 
-		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
 			break;
 
-		dma_sync_single_for_device(xdp_ring->dev, dma, len,
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
 					   DMA_BIDIRECTIONAL);
 
 		tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
-		tx_bi->bytecount = len;
+		tx_bi->bytecount = desc.len;
 		tx_bi->xdpf = NULL;
+		tx_bi->gso_segs = 1;
 
 		tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
@@ -648,10 +602,10 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 		cmd_type = IXGBE_ADVTXD_DTYP_DATA |
 			   IXGBE_ADVTXD_DCMD_DEXT |
 			   IXGBE_ADVTXD_DCMD_IFCS;
-		cmd_type |= len | IXGBE_TXD_CMD;
+		cmd_type |= desc.len | IXGBE_TXD_CMD;
 		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
 		tx_desc->read.olinfo_status =
-			cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+			cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 
 		xdp_ring->next_to_use++;
 		if (xdp_ring->next_to_use == xdp_ring->count)
@@ -704,7 +658,6 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
 			xsk_frames++;
 
 		tx_bi->xdpf = NULL;
-		total_bytes += tx_bi->bytecount;
 
 		tx_bi++;
 		tx_desc++;
@@ -753,7 +706,7 @@ int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
 	if (qid >= adapter->num_xdp_queues)
 		return -ENXIO;
 
-	if (!adapter->xsk_umems || !adapter->xsk_umems[qid])
+	if (!adapter->xdp_ring[qid]->xsk_umem)
 		return -ENXIO;
 
 	ring = adapter->xdp_ring[qid];
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 5399787e07af..54459b69c948 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -85,22 +85,16 @@ static int ixgbevf_get_link_ksettings(struct net_device *netdev,
 				      struct ethtool_link_ksettings *cmd)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 link_speed = 0;
-	bool link_up;
 
 	ethtool_link_ksettings_zero_link_mode(cmd, supported);
 	ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
 	cmd->base.autoneg = AUTONEG_DISABLE;
 	cmd->base.port = -1;
 
-	hw->mac.get_link_status = 1;
-	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-
-	if (link_up) {
+	if (adapter->link_up) {
 		__u32 speed = SPEED_10000;
 
-		switch (link_speed) {
+		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			speed = SPEED_10000;
 			break;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d189ed247665..d2b41f9f87f8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1423,6 +1423,9 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector,
 	 */
 	/* what was last interrupt timeslice? */
 	timepassed_us = q_vector->itr >> 2;
+	if (timepassed_us == 0)
+		return;
+
 	bytes_perint = bytes / timepassed_us; /* bytes/usec */
 
 	switch (itr_setting) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index cd3b81300cc7..d5ce49636548 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -508,9 +508,8 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
-	ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, IXGBE_VFMAILBOX_SIZE);
-
-	return 0;
+	return ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+			IXGBE_VFMAILBOX_SIZE);
 }
 
 /**
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index c5dac6bd2be4..f660cc2b8258 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -64,7 +64,7 @@
 
 struct orion_mdio_dev {
 	void __iomem *regs;
-	struct clk *clk[3];
+	struct clk *clk[4];
 	/*
 	 * If we have access to the error interrupt pin (which is
 	 * somewhat misnamed as it not only reflects internal errors
@@ -321,11 +321,19 @@ static int orion_mdio_probe(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(dev->clk); i++) {
 		dev->clk[i] = of_clk_get(pdev->dev.of_node, i);
+		if (PTR_ERR(dev->clk[i]) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto out_clk;
+		}
 		if (IS_ERR(dev->clk[i]))
 			break;
 		clk_prepare_enable(dev->clk[i]);
 	}
 
+	if (!IS_ERR(of_clk_get(pdev->dev.of_node, ARRAY_SIZE(dev->clk))))
+		dev_warn(&pdev->dev, "unsupported number of clocks, limiting to the first "
+			 __stringify(ARRAY_SIZE(dev->clk)) "\n");
+
 	dev->err_interrupt = platform_get_irq(pdev, 0);
 	if (dev->err_interrupt > 0 &&
 	    resource_size(r) < MVMDIO_ERR_INT_MASK + 4) {
@@ -362,6 +370,7 @@ out_mdio:
 	if (dev->err_interrupt > 0)
 		writel(0, dev->regs + MVMDIO_ERR_INT_MASK);
 
+out_clk:
 	for (i = 0; i < ARRAY_SIZE(dev->clk); i++) {
 		if (IS_ERR(dev->clk[i]))
 			break;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 269bd73be1a0..895bfed26a8a 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -437,6 +437,7 @@ struct mvneta_port {
 	struct device_node *dn;
 	unsigned int tx_csum_limit;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	struct phy *comphy;
 
 	struct mvneta_bm *bm_priv;
@@ -1118,7 +1119,7 @@ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
 			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
 
 	/* Fill entire long pool */
-	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size);
 	if (num != hwbm_pool->size) {
 		WARN(1, "pool %d: %d of %d allocated\n",
 		     bm_pool->id, num, hwbm_pool->size);
@@ -3356,9 +3357,11 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 	return 0;
 }
 
-static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
+static void mvneta_validate(struct phylink_config *config,
+			    unsigned long *supported,
 			    struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
@@ -3408,9 +3411,10 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
 	phylink_helper_basex_speed(state);
 }
 
-static int mvneta_mac_link_state(struct net_device *ndev,
+static int mvneta_mac_link_state(struct phylink_config *config,
 				 struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_stat;
 
@@ -3438,8 +3442,9 @@ static int mvneta_mac_link_state(struct net_device *ndev,
 	return 1;
 }
 
-static void mvneta_mac_an_restart(struct net_device *ndev)
+static void mvneta_mac_an_restart(struct phylink_config *config)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 
@@ -3449,9 +3454,10 @@ static void mvneta_mac_an_restart(struct net_device *ndev)
 		    gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN);
 }
 
-static void mvneta_mac_config(struct net_device *ndev, unsigned int mode,
-	const struct phylink_link_state *state)
+static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0);
 	u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
@@ -3581,9 +3587,10 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable)
 	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1);
 }
 
-static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
-				 phy_interface_t interface)
+static void mvneta_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -3600,10 +3607,11 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
 	mvneta_set_eee(pp, false);
 }
 
-static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode,
+static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface,
 			       struct phy_device *phy)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -4500,8 +4508,14 @@ static int mvneta_probe(struct platform_device *pdev)
 		comphy = NULL;
 	}
 
-	phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode,
-				 &mvneta_phylink_ops);
+	pp = netdev_priv(dev);
+	spin_lock_init(&pp->lock);
+
+	pp->phylink_config.dev = &dev->dev;
+	pp->phylink_config.type = PHYLINK_NETDEV;
+
+	phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
+				 phy_mode, &mvneta_phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
 		goto err_free_irq;
@@ -4513,8 +4527,6 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
-	pp = netdev_priv(dev);
-	spin_lock_init(&pp->lock);
 	pp->phylink = phylink;
 	pp->comphy = comphy;
 	pp->phy_interface = phy_mode;
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c
index de468e1bdba9..82ee2bcca6fd 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.c
+++ b/drivers/net/ethernet/marvell/mvneta_bm.c
@@ -190,7 +190,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		hwbm_pool->construct = mvneta_bm_construct;
 		hwbm_pool->priv = new_pool;
-		spin_lock_init(&hwbm_pool->lock);
+		mutex_init(&hwbm_pool->buf_lock);
 
 		/* Create new pool */
 		err = mvneta_bm_pool_create(priv, new_pool);
@@ -201,7 +201,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
 		}
 
 		/* Allocate buffers for this pool */
-		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size);
 		if (num != hwbm_pool->size) {
 			WARN(1, "pool %d: %d of %d allocated\n",
 			     new_pool->id, num, hwbm_pool->size);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 6171270a016c..4d9564ba68f6 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -148,6 +148,8 @@
 #define MVPP22_CLS_C2_ATTR2			0x1b6c
 #define     MVPP22_CLS_C2_ATTR2_RSS_EN		BIT(30)
 #define MVPP22_CLS_C2_ATTR3			0x1b70
+#define MVPP22_CLS_C2_TCAM_CTRL			0x1b90
+#define     MVPP22_CLS_C2_TCAM_BYPASS_FIFO	BIT(0)
 
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG			0x2040
@@ -327,8 +329,26 @@
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK	0xff00
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT	8
 
+/* Packet Processor per-port counters */
+#define MVPP2_OVERRUN_ETH_DROP			0x7000
+#define MVPP2_CLS_ETH_DROP			0x7020
+
 /* Hit counters registers */
 #define MVPP2_CTRS_IDX				0x7040
+#define     MVPP22_CTRS_TX_CTR(port, txq)	((txq) | ((port) << 3) | BIT(7))
+#define MVPP2_TX_DESC_ENQ_CTR			0x7100
+#define MVPP2_TX_DESC_ENQ_TO_DDR_CTR		0x7104
+#define MVPP2_TX_BUFF_ENQ_TO_DDR_CTR		0x7108
+#define MVPP2_TX_DESC_ENQ_HW_FWD_CTR		0x710c
+#define MVPP2_RX_DESC_ENQ_CTR			0x7120
+#define MVPP2_TX_PKTS_DEQ_CTR			0x7130
+#define MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR	0x7200
+#define MVPP2_TX_PKTS_EARLY_DROP_CTR		0x7204
+#define MVPP2_TX_PKTS_BM_DROP_CTR		0x7208
+#define MVPP2_TX_PKTS_BM_MC_DROP_CTR		0x720c
+#define MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR	0x7220
+#define MVPP2_RX_PKTS_EARLY_DROP_CTR		0x7224
+#define MVPP2_RX_PKTS_BM_DROP_CTR		0x7228
 #define MVPP2_CLS_DEC_TBL_HIT_CTR		0x7700
 #define MVPP2_CLS_FLOW_TBL_HIT_CTR		0x7704
 
@@ -624,6 +644,7 @@
 #define MVPP2_N_RFS_RULES		(MVPP2_N_RFS_ENTRIES_PER_FLOW * 7)
 
 /* RSS constants */
+#define MVPP22_N_RSS_TABLES		8
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* IPv6 max L3 address size */
@@ -725,6 +746,10 @@ enum mvpp2_prs_l3_cast {
 /* Definitions */
 struct mvpp2_dbgfs_entries;
 
+struct mvpp2_rss_table {
+	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+};
+
 /* Shared Packet Processor resources */
 struct mvpp2 {
 	/* Shared registers' base addresses */
@@ -788,6 +813,9 @@ struct mvpp2 {
 
 	/* Debugfs entries private data */
 	struct mvpp2_dbgfs_entries *dbgfs_entries;
+
+	/* RSS Indirection tables */
+	struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES];
 };
 
 struct mvpp2_pcpu_stats {
@@ -905,6 +933,7 @@ struct mvpp2_port {
 
 	phy_interface_t phy_interface;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	struct phy *comphy;
 
 	struct mvpp2_bm_pool *pool_long;
@@ -919,12 +948,14 @@ struct mvpp2_port {
 
 	u32 tx_time_coal;
 
-	/* RSS indirection table */
-	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
-
 	/* List of steering rules active on that port */
-	struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_RULES];
+	struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
 	int n_rfs_rules;
+
+	/* Each port has its own view of the rss contexts, so that it can number
+	 * them from 0
+	 */
+	int rss_ctx[MVPP22_N_RSS_TABLES];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index a57d17ab91f0..35478cba2aa5 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -44,17 +44,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* TCP over IPv4 flows, Not fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -79,17 +79,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* TCP over IPv4 flows, fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -114,17 +114,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* UDP over IPv4 flows, Not fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -149,17 +149,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* UDP over IPv4 flows, fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -178,12 +178,12 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* TCP over IPv6 flows, not fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -202,13 +202,13 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* TCP over IPv6 flows, fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
@@ -228,12 +228,12 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* UDP over IPv6 flows, not fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
@@ -252,13 +252,13 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* UDP over IPv6 flows, fragmented, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
@@ -279,15 +279,15 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* IPv4 flows, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 
@@ -303,11 +303,11 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 
 	/* IPv6 flows, with vlan tag */
 	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 
@@ -548,6 +548,8 @@ void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
 static int mvpp2_cls_ethtool_flow_to_type(int flow_type)
 {
 	switch (flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case ETHER_FLOW:
+		return MVPP22_FLOW_ETHERNET;
 	case TCP_V4_FLOW:
 		return MVPP22_FLOW_TCP4;
 	case TCP_V6_FLOW:
@@ -596,7 +598,7 @@ static void mvpp2_cls_flow_init(struct mvpp2 *priv,
 
 	mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
 	mvpp2_cls_flow_port_id_sel(&fe, true);
-	mvpp2_cls_flow_lu_type_set(&fe, MVPP22_FLOW_ETHERNET);
+	mvpp2_cls_flow_lu_type_set(&fe, MVPP22_CLS_LU_TYPE_ALL);
 
 	/* Add all ports */
 	for (i = 0; i < MVPP2_MAX_PORTS; i++)
@@ -655,6 +657,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
 		case MVPP22_CLS_HEK_OPT_VLAN:
 			field_id = MVPP22_CLS_FIELD_VLAN;
 			break;
+		case MVPP22_CLS_HEK_OPT_VLAN_PRI:
+			field_id = MVPP22_CLS_FIELD_VLAN_PRI;
+			break;
 		case MVPP22_CLS_HEK_OPT_IP4SA:
 			field_id = MVPP22_CLS_FIELD_IP4SA;
 			break;
@@ -689,6 +694,10 @@ static int mvpp2_cls_hek_field_size(u32 field)
 	switch (field) {
 	case MVPP22_CLS_HEK_OPT_MAC_DA:
 		return 48;
+	case MVPP22_CLS_HEK_OPT_VLAN:
+		return 12;
+	case MVPP22_CLS_HEK_OPT_VLAN_PRI:
+		return 3;
 	case MVPP22_CLS_HEK_OPT_IP4SA:
 	case MVPP22_CLS_HEK_OPT_IP4DA:
 		return 32;
@@ -777,6 +786,9 @@ u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe)
 		case MVPP22_CLS_FIELD_VLAN:
 			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
 			break;
+		case MVPP22_CLS_FIELD_VLAN_PRI:
+			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN_PRI;
+			break;
 		case MVPP22_CLS_FIELD_L3_PROTO:
 			hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
 			break;
@@ -861,7 +873,7 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
 
 	/* Match on Lookup Type */
 	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK));
-	c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP22_FLOW_ETHERNET);
+	c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP22_CLS_LU_TYPE_ALL);
 
 	/* Update RSS status after matching this entry */
 	c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
@@ -923,6 +935,12 @@ void mvpp2_cls_init(struct mvpp2 *priv)
 		mvpp2_cls_c2_write(priv, &c2);
 	}
 
+	/* Disable the FIFO stages in C2 engine, which are only used in BIST
+	 * mode
+	 */
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_CTRL,
+		    MVPP22_CLS_C2_TCAM_BYPASS_FIFO);
+
 	mvpp2_cls_port_init_flows(priv);
 }
 
@@ -963,12 +981,22 @@ u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index)
 	return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
 }
 
-static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* The RxQ number is used to select the RSS table. It that case, we set
+	 * it to be the ctx number.
+	 */
+	qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		     MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
@@ -977,22 +1005,45 @@ static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
 static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* Reset the default destination RxQ to the port's first rx queue. */
+	qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		      MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
 }
 
-void mvpp22_port_rss_enable(struct mvpp2_port *port)
+static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx)
+{
+	return port->rss_ctx[port_rss_ctx];
+}
+
+int mvpp22_port_rss_enable(struct mvpp2_port *port)
 {
-	mvpp2_rss_port_c2_enable(port);
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
+	mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0));
+
+	return 0;
 }
 
-void mvpp22_port_rss_disable(struct mvpp2_port *port)
+int mvpp22_port_rss_disable(struct mvpp2_port *port)
 {
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
 	mvpp2_rss_port_c2_disable(port);
+
+	return 0;
 }
 
 static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry)
@@ -1029,7 +1080,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
 	struct flow_action_entry *act;
 	struct mvpp2_cls_c2_entry c2;
 	u8 qh, ql, pmap;
-	int index;
+	int index, ctx;
 
 	memset(&c2, 0, sizeof(c2));
 
@@ -1042,13 +1093,13 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
 
 	rule->c2_index = c2.index;
 
-	c2.tcam[0] = (rule->c2_tcam & 0xffff) |
+	c2.tcam[3] = (rule->c2_tcam & 0xffff) |
 		     ((rule->c2_tcam_mask & 0xffff) << 16);
-	c2.tcam[1] = ((rule->c2_tcam >> 16) & 0xffff) |
+	c2.tcam[2] = ((rule->c2_tcam >> 16) & 0xffff) |
 		     (((rule->c2_tcam_mask >> 16) & 0xffff) << 16);
-	c2.tcam[2] = ((rule->c2_tcam >> 32) & 0xffff) |
+	c2.tcam[1] = ((rule->c2_tcam >> 32) & 0xffff) |
 		     (((rule->c2_tcam_mask >> 32) & 0xffff) << 16);
-	c2.tcam[3] = ((rule->c2_tcam >> 48) & 0xffff) |
+	c2.tcam[0] = ((rule->c2_tcam >> 48) & 0xffff) |
 		     (((rule->c2_tcam_mask >> 48) & 0xffff) << 16);
 
 	pmap = BIT(port->id);
@@ -1069,14 +1120,36 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
 		 */
 		c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK);
 
+		/* Update RSS status after matching this entry */
+		if (act->queue.ctx)
+			c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+		/* Always lock the RSS_EN decision. We might have high prio
+		 * rules steering to an RXQ, and a lower one steering to RSS,
+		 * we don't want the low prio RSS rule overwriting this flag.
+		 */
+		c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
 		/* Mark packet as "forwarded to software", needed for RSS */
 		c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
 
 		c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) |
 			   MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK);
 
-		qh = ((act->queue.index + port->first_rxq) >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
-		ql = (act->queue.index + port->first_rxq) & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		if (act->queue.ctx) {
+			/* Get the global ctx number */
+			ctx = mvpp22_rss_ctx(port, act->queue.ctx);
+			if (ctx < 0)
+				return -EINVAL;
+
+			qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		} else {
+			qh = ((act->queue.index + port->first_rxq) >> 3) &
+			      MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = (act->queue.index + port->first_rxq) &
+			      MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		}
 
 		c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
 			      MVPP22_CLS_C2_ATTR0_QLOW(ql);
@@ -1140,6 +1213,9 @@ static int mvpp2_port_flt_rfs_rule_insert(struct mvpp2_port *port,
 		if (!flow)
 			return 0;
 
+		if ((rule->hek_fields & flow->supported_hash_opts) != rule->hek_fields)
+			continue;
+
 		index = MVPP2_CLS_FLT_C2_RFS(port->id, flow->flow_id, rule->loc);
 
 		mvpp2_cls_flow_read(priv, index, &fe);
@@ -1158,7 +1234,44 @@ static int mvpp2_port_flt_rfs_rule_insert(struct mvpp2_port *port,
 static int mvpp2_cls_c2_build_match(struct mvpp2_rfs_rule *rule)
 {
 	struct flow_rule *flow = rule->flow;
-	int offs = 64;
+	int offs = 0;
+
+	/* The order of insertion in C2 tcam must match the order in which
+	 * the fields are found in the header
+	 */
+	if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(flow, &match);
+		if (match.mask->vlan_id) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN;
+
+			rule->c2_tcam |= ((u64)match.key->vlan_id) << offs;
+			rule->c2_tcam_mask |= ((u64)match.mask->vlan_id) << offs;
+
+			/* Don't update the offset yet */
+		}
+
+		if (match.mask->vlan_priority) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN_PRI;
+
+			/* VLAN pri is always at offset 13 relative to the
+			 * current offset
+			 */
+			rule->c2_tcam |= ((u64)match.key->vlan_priority) <<
+				(offs + 13);
+			rule->c2_tcam_mask |= ((u64)match.mask->vlan_priority) <<
+				(offs + 13);
+		}
+
+		if (match.mask->vlan_dei)
+			return -EOPNOTSUPP;
+
+		/* vlan id and prio always seem to take a full 16-bit slot in
+		 * the Header Extracted Key.
+		 */
+		offs += 16;
+	}
 
 	if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_PORTS)) {
 		struct flow_match_ports match;
@@ -1166,18 +1279,18 @@ static int mvpp2_cls_c2_build_match(struct mvpp2_rfs_rule *rule)
 		flow_rule_match_ports(flow, &match);
 		if (match.mask->src) {
 			rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4SIP;
-			offs -= mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4SIP);
 
 			rule->c2_tcam |= ((u64)ntohs(match.key->src)) << offs;
 			rule->c2_tcam_mask |= ((u64)ntohs(match.mask->src)) << offs;
+			offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4SIP);
 		}
 
 		if (match.mask->dst) {
 			rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4DIP;
-			offs -= mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4DIP);
 
 			rule->c2_tcam |= ((u64)ntohs(match.key->dst)) << offs;
 			rule->c2_tcam_mask |= ((u64)ntohs(match.mask->dst)) << offs;
+			offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4DIP);
 		}
 	}
 
@@ -1196,6 +1309,13 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule)
 	if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP)
 		return -EOPNOTSUPP;
 
+	/* When both an RSS context and an queue index are set, the index
+	 * is considered as an offset to be added to the indirection table
+	 * entries. We don't support this, so reject this rule.
+	 */
+	if (act->queue.ctx && act->queue.index)
+		return -EOPNOTSUPP;
+
 	/* For now, only use the C2 engine which has a HEK size limited to 64
 	 * bits for TCAM matching.
 	 */
@@ -1212,7 +1332,7 @@ int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
 {
 	struct mvpp2_ethtool_fs *efs;
 
-	if (rxnfc->fs.location >= MVPP2_N_RFS_RULES)
+	if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
 		return -EINVAL;
 
 	efs = port->rfs_rules[rxnfc->fs.location];
@@ -1232,8 +1352,7 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
 	struct mvpp2_ethtool_fs *efs, *old_efs;
 	int ret = 0;
 
-	if (info->fs.location >= 4 ||
-	    info->fs.location < 0)
+	if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
 		return -EINVAL;
 
 	efs = kzalloc(sizeof(*efs), GFP_KERNEL);
@@ -1242,6 +1361,12 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
 
 	input.fs = &info->fs;
 
+	/* We need to manually set the rss_ctx, since this info isn't present
+	 * in info->fs
+	 */
+	if (info->fs.flow_type & FLOW_RSS)
+		input.rss_ctx = info->rss_context;
+
 	ethtool_rule = ethtool_rx_flow_rule_create(&input);
 	if (IS_ERR(ethtool_rule)) {
 		ret = PTR_ERR(ethtool_rule);
@@ -1250,6 +1375,10 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
 
 	efs->rule.flow = ethtool_rule->rule;
 	efs->rule.flow_type = mvpp2_cls_ethtool_flow_to_type(info->fs.flow_type);
+	if (efs->rule.flow_type < 0) {
+		ret = efs->rule.flow_type;
+		goto clean_rule;
+	}
 
 	ret = mvpp2_cls_rfs_parse_rule(&efs->rule);
 	if (ret)
@@ -1328,19 +1457,160 @@ static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
 	return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
 }
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+static void mvpp22_rss_fill_table(struct mvpp2_port *port,
+				  struct mvpp2_rss_table *table,
+				  u32 rss_ctx)
 {
 	struct mvpp2 *priv = port->priv;
 	int i;
 
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
-		u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+		u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
 			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
 		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
 
 		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
-			    mvpp22_rxfh_indir(port, port->indir[i]));
+			    mvpp22_rxfh_indir(port, table->indir[i]));
+	}
+}
+
+static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 ctx;
+
+	/* Find the first free RSS table */
+	for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) {
+		if (!priv->rss_tables[ctx])
+			break;
+	}
+
+	if (ctx == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]),
+					GFP_KERNEL);
+	if (!priv->rss_tables[ctx])
+		return -ENOMEM;
+
+	*rss_ctx = ctx;
+
+	/* Set the table width: replace the whole classifier Rx queue number
+	 * with the ones configured in RSS table entries.
+	 */
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx));
+	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
+	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+{
+	u32 rss_ctx;
+	int ret, i;
+
+	ret = mvpp22_rss_context_create(port, &rss_ctx);
+	if (ret)
+		return ret;
+
+	/* Find the first available context number in the port, starting from 1.
+	 * Context 0 on each port is reserved for the default context.
+	 */
+	for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
+		if (port->rss_ctx[i] < 0)
+			break;
+	}
+
+	if (i == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	port->rss_ctx[i] = rss_ctx;
+	*port_ctx = i;
+
+	return 0;
+}
+
+static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
+						    int rss_ctx)
+{
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return NULL;
+
+	return priv->rss_tables[rss_ctx];
+}
+
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	struct ethtool_rxnfc *rxnfc;
+	int i, rss_ctx, ret;
+
+	rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	/* Invalidate any active classification rule that use this context */
+	for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+		if (!port->rfs_rules[i])
+			continue;
+
+		rxnfc = &port->rfs_rules[i]->rxnfc;
+		if (!(rxnfc->fs.flow_type & FLOW_RSS) ||
+		    rxnfc->rss_context != port_ctx)
+			continue;
+
+		ret = mvpp2_ethtool_cls_rule_del(port, rxnfc);
+		if (ret) {
+			netdev_warn(port->dev,
+				    "couldn't remove classification rule %d associated to this context",
+				    rxnfc->fs.location);
+		}
 	}
+
+	kfree(priv->rss_tables[rss_ctx]);
+
+	priv->rss_tables[rss_ctx] = NULL;
+	port->rss_ctx[port_ctx] = -1;
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx,
+				  const u32 *indir)
+{
+	int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(rss_table->indir, indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	mvpp22_rss_fill_table(port, rss_table, rss_ctx);
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx,
+				  u32 *indir)
+{
+	int rss_ctx =  mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(indir, rss_table->indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	return 0;
 }
 
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
@@ -1424,32 +1694,32 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
 	return 0;
 }
 
-void mvpp22_port_rss_init(struct mvpp2_port *port)
+int mvpp22_port_rss_init(struct mvpp2_port *port)
 {
-	struct mvpp2 *priv = port->priv;
-	int i;
+	struct mvpp2_rss_table *table;
+	u32 context = 0;
+	int i, ret;
 
-	/* Set the table width: replace the whole classifier Rx queue number
-	 * with the ones configured in RSS table entries.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
-	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+	for (i = 0; i < MVPP22_N_RSS_TABLES; i++)
+		port->rss_ctx[i] = -1;
 
-	/* The default RxQ is used as a key to select the RSS table to use.
-	 * We use one RSS table per port.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX,
-		    MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
-	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
-		    MVPP22_RSS_TABLE_POINTER(port->id));
+	ret = mvpp22_rss_context_create(port, &context);
+	if (ret)
+		return ret;
+
+	table = mvpp22_rss_table_get(port->priv, context);
+	if (!table)
+		return -EINVAL;
+
+	port->rss_ctx[0] = context;
 
 	/* Configure the first table to evenly distribute the packets across
 	 * real Rx Queues. The table entries map a hash to a port Rx Queue.
 	 */
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
-		port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
+		table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
 
-	mvpp22_rss_fill_table(port, port->id);
+	mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0));
 
 	/* Configure default flows */
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T);
@@ -1458,4 +1728,6 @@ void mvpp22_port_rss_init(struct mvpp2_port *port)
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T);
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T);
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 56b617375a65..8867f25afab4 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -33,15 +33,16 @@ enum mvpp2_cls_engine {
 };
 
 #define MVPP22_CLS_HEK_OPT_MAC_DA	BIT(0)
-#define MVPP22_CLS_HEK_OPT_VLAN		BIT(1)
-#define MVPP22_CLS_HEK_OPT_L3_PROTO	BIT(2)
-#define MVPP22_CLS_HEK_OPT_IP4SA	BIT(3)
-#define MVPP22_CLS_HEK_OPT_IP4DA	BIT(4)
-#define MVPP22_CLS_HEK_OPT_IP6SA	BIT(5)
-#define MVPP22_CLS_HEK_OPT_IP6DA	BIT(6)
-#define MVPP22_CLS_HEK_OPT_L4SIP	BIT(7)
-#define MVPP22_CLS_HEK_OPT_L4DIP	BIT(8)
-#define MVPP22_CLS_HEK_N_FIELDS		9
+#define MVPP22_CLS_HEK_OPT_VLAN_PRI	BIT(1)
+#define MVPP22_CLS_HEK_OPT_VLAN		BIT(2)
+#define MVPP22_CLS_HEK_OPT_L3_PROTO	BIT(3)
+#define MVPP22_CLS_HEK_OPT_IP4SA	BIT(4)
+#define MVPP22_CLS_HEK_OPT_IP4DA	BIT(5)
+#define MVPP22_CLS_HEK_OPT_IP6SA	BIT(6)
+#define MVPP22_CLS_HEK_OPT_IP6DA	BIT(7)
+#define MVPP22_CLS_HEK_OPT_L4SIP	BIT(8)
+#define MVPP22_CLS_HEK_OPT_L4DIP	BIT(9)
+#define MVPP22_CLS_HEK_N_FIELDS		10
 
 #define MVPP22_CLS_HEK_L4_OPTS	(MVPP22_CLS_HEK_OPT_L4SIP | \
 				 MVPP22_CLS_HEK_OPT_L4DIP)
@@ -59,8 +60,12 @@ enum mvpp2_cls_engine {
 #define MVPP22_CLS_HEK_IP6_5T	(MVPP22_CLS_HEK_IP6_2T | \
 				 MVPP22_CLS_HEK_L4_OPTS)
 
+#define MVPP22_CLS_HEK_TAGGED	(MVPP22_CLS_HEK_OPT_VLAN | \
+				 MVPP22_CLS_HEK_OPT_VLAN_PRI)
+
 enum mvpp2_cls_field_id {
 	MVPP22_CLS_FIELD_MAC_DA = 0x03,
+	MVPP22_CLS_FIELD_VLAN_PRI = 0x05,
 	MVPP22_CLS_FIELD_VLAN = 0x06,
 	MVPP22_CLS_FIELD_L3_PROTO = 0x0f,
 	MVPP22_CLS_FIELD_IP4SA = 0x10,
@@ -180,6 +185,11 @@ enum mvpp2_prs_flow {
 /* LU Type defined for all engines, and specified in the flow table */
 #define MVPP2_CLS_LU_TYPE_MASK			0x3f
 
+enum mvpp2_cls_lu_type {
+	/* rule->loc is used as a lu-type for the entries 0 - 62. */
+	MVPP22_CLS_LU_TYPE_ALL = 63,
+};
+
 #define MVPP2_N_FLOWS		(MVPP2_FL_LAST - MVPP2_FL_START)
 
 struct mvpp2_cls_flow {
@@ -249,11 +259,18 @@ struct mvpp2_cls_lookup_entry {
 	u32 data;
 };
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
-void mvpp22_port_rss_init(struct mvpp2_port *port);
+int mvpp22_port_rss_init(struct mvpp2_port *port);
+
+int mvpp22_port_rss_enable(struct mvpp2_port *port);
+int mvpp22_port_rss_disable(struct mvpp2_port *port);
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
 
-void mvpp22_port_rss_enable(struct mvpp2_port *port);
-void mvpp22_port_rss_disable(struct mvpp2_port *port);
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
+				  const u32 *indir);
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx,
+				  u32 *indir);
 
 int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index d8e5241097a9..c51f1d5b550b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -56,9 +56,9 @@ static struct {
 /* The prototype is added here to be used in start_dev when using ACPI. This
  * will be removed once phylink is used for all modes (dt+ACPI).
  */
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy);
 
 /* Queue modes */
@@ -1258,6 +1258,17 @@ static u64 mvpp2_read_count(struct mvpp2_port *port,
 	return val;
 }
 
+/* Some counters are accessed indirectly by first writing an index to
+ * MVPP2_CTRS_IDX. The index can represent various resources depending on the
+ * register we access, it can be a hit counter for some classification tables,
+ * a counter specific to a rxq, a txq or a buffer pool.
+ */
+static u32 mvpp2_read_index(struct mvpp2 *priv, u32 index, u32 reg)
+{
+	mvpp2_write(priv, MVPP2_CTRS_IDX, index);
+	return mvpp2_read(priv, reg);
+}
+
 /* Due to the fact that software statistics and hardware statistics are, by
  * design, incremented at different moments in the chain of packet processing,
  * it is very likely that incoming packets could have been dropped after being
@@ -1267,7 +1278,7 @@ static u64 mvpp2_read_count(struct mvpp2_port *port,
  * Hence, statistics gathered from userspace with ifconfig (software) and
  * ethtool (hardware) cannot be compared.
  */
-static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = {
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_mib_regs[] = {
 	{ MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true },
 	{ MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
 	{ MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
@@ -1297,31 +1308,114 @@ static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = {
 	{ MVPP2_MIB_LATE_COLLISION, "late_collision" },
 };
 
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_port_regs[] = {
+	{ MVPP2_OVERRUN_ETH_DROP, "rx_fifo_or_parser_overrun_drops" },
+	{ MVPP2_CLS_ETH_DROP, "rx_classifier_drops" },
+};
+
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_txq_regs[] = {
+	{ MVPP2_TX_DESC_ENQ_CTR, "txq_%d_desc_enqueue" },
+	{ MVPP2_TX_DESC_ENQ_TO_DDR_CTR, "txq_%d_desc_enqueue_to_ddr" },
+	{ MVPP2_TX_BUFF_ENQ_TO_DDR_CTR, "txq_%d_buff_euqueue_to_ddr" },
+	{ MVPP2_TX_DESC_ENQ_HW_FWD_CTR, "txq_%d_desc_hardware_forwarded" },
+	{ MVPP2_TX_PKTS_DEQ_CTR, "txq_%d_packets_dequeued" },
+	{ MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR, "txq_%d_queue_full_drops" },
+	{ MVPP2_TX_PKTS_EARLY_DROP_CTR, "txq_%d_packets_early_drops" },
+	{ MVPP2_TX_PKTS_BM_DROP_CTR, "txq_%d_packets_bm_drops" },
+	{ MVPP2_TX_PKTS_BM_MC_DROP_CTR, "txq_%d_packets_rep_bm_drops" },
+};
+
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_rxq_regs[] = {
+	{ MVPP2_RX_DESC_ENQ_CTR, "rxq_%d_desc_enqueue" },
+	{ MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR, "rxq_%d_queue_full_drops" },
+	{ MVPP2_RX_PKTS_EARLY_DROP_CTR, "rxq_%d_packets_early_drops" },
+	{ MVPP2_RX_PKTS_BM_DROP_CTR, "rxq_%d_packets_bm_drops" },
+};
+
+#define MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs)	(ARRAY_SIZE(mvpp2_ethtool_mib_regs) + \
+						 ARRAY_SIZE(mvpp2_ethtool_port_regs) + \
+						 (ARRAY_SIZE(mvpp2_ethtool_txq_regs) * (ntxqs)) + \
+						 (ARRAY_SIZE(mvpp2_ethtool_rxq_regs) * (nrxqs)))
+
 static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
 				      u8 *data)
 {
-	if (sset == ETH_SS_STATS) {
-		int i;
+	struct mvpp2_port *port = netdev_priv(netdev);
+	int i, q;
 
-		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-			strscpy(data + i * ETH_GSTRING_LEN,
-			        mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++) {
+		strscpy(data, mvpp2_ethtool_mib_regs[i].string,
+			ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++) {
+		strscpy(data, mvpp2_ethtool_port_regs[i].string,
+			ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (q = 0; q < port->ntxqs; q++) {
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) {
+			snprintf(data, ETH_GSTRING_LEN,
+				 mvpp2_ethtool_txq_regs[i].string, q);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+
+	for (q = 0; q < port->nrxqs; q++) {
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) {
+			snprintf(data, ETH_GSTRING_LEN,
+				 mvpp2_ethtool_rxq_regs[i].string,
+				 q);
+			data += ETH_GSTRING_LEN;
+		}
 	}
 }
 
+static void mvpp2_read_stats(struct mvpp2_port *port)
+{
+	u64 *pstats;
+	int i, q;
+
+	pstats = port->ethtool_stats;
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++)
+		*pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_mib_regs[i]);
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++)
+		*pstats++ += mvpp2_read(port->priv,
+					mvpp2_ethtool_port_regs[i].offset +
+					4 * port->id);
+
+	for (q = 0; q < port->ntxqs; q++)
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++)
+			*pstats++ += mvpp2_read_index(port->priv,
+						      MVPP22_CTRS_TX_CTR(port->id, i),
+						      mvpp2_ethtool_txq_regs[i].offset);
+
+	/* Rxqs are numbered from 0 from the user standpoint, but not from the
+	 * driver's. We need to add the  port->first_rxq offset.
+	 */
+	for (q = 0; q < port->nrxqs; q++)
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++)
+			*pstats++ += mvpp2_read_index(port->priv,
+						      port->first_rxq + i,
+						      mvpp2_ethtool_rxq_regs[i].offset);
+}
+
 static void mvpp2_gather_hw_statistics(struct work_struct *work)
 {
 	struct delayed_work *del_work = to_delayed_work(work);
 	struct mvpp2_port *port = container_of(del_work, struct mvpp2_port,
 					       stats_work);
-	u64 *pstats;
-	int i;
 
 	mutex_lock(&port->gather_stats_lock);
 
-	pstats = port->ethtool_stats;
-	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-		*pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
+	mvpp2_read_stats(port);
 
 	/* No need to read again the counters right after this function if it
 	 * was called asynchronously by the user (ie. use of ethtool).
@@ -1345,27 +1439,24 @@ static void mvpp2_ethtool_get_stats(struct net_device *dev,
 
 	mutex_lock(&port->gather_stats_lock);
 	memcpy(data, port->ethtool_stats,
-	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));
+	       sizeof(u64) * MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs));
 	mutex_unlock(&port->gather_stats_lock);
 }
 
 static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
 {
+	struct mvpp2_port *port = netdev_priv(dev);
+
 	if (sset == ETH_SS_STATS)
-		return ARRAY_SIZE(mvpp2_ethtool_regs);
+		return MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs);
 
 	return -EOPNOTSUPP;
 }
 
 static void mvpp2_mac_reset_assert(struct mvpp2_port *port)
 {
-	unsigned int i;
 	u32 val;
 
-	/* Read the GOP statistics to reset the hardware counters */
-	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-		mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
-
 	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) |
 	      MVPP2_GMAC_PORT_RESET_MASK;
 	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
@@ -3237,9 +3328,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 		struct phylink_link_state state = {
 			.interface = port->phy_interface,
 		};
-		mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
-		mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface,
-				  NULL);
+		mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+		mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND,
+				  port->phy_interface, NULL);
 	}
 
 	netif_tx_start_all_queues(port->dev);
@@ -3954,7 +4045,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
 		ret = mvpp2_ethtool_cls_rule_get(port, info);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		for (i = 0; i < MVPP2_N_RFS_RULES; i++) {
+		for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
 			if (port->rfs_rules[i])
 				rules[loc++] = i;
 		}
@@ -4000,24 +4091,25 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 				  u8 *hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
 
 	if (indir)
-		memcpy(indir, port->indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+		ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir);
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_CRC32;
 
-	return 0;
+	return ret;
 }
 
 static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 				  const u8 *key, const u8 hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
@@ -4028,15 +4120,58 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 	if (key)
 		return -EOPNOTSUPP;
 
-	if (indir) {
-		memcpy(port->indir, indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
-		mvpp22_rss_fill_table(port, port->id);
-	}
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir);
 
-	return 0;
+	return ret;
+}
+
+static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
+					  u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_CRC32;
+
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir);
+
+	return ret;
 }
 
+static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
+					  const u32 *indir, const u8 *key,
+					  const u8 hfunc, u32 *rss_context,
+					  bool delete)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+		return -EOPNOTSUPP;
+
+	if (key)
+		return -EOPNOTSUPP;
+
+	if (delete)
+		return mvpp22_port_rss_ctx_delete(port, *rss_context);
+
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		ret = mvpp22_port_rss_ctx_create(port, rss_context);
+		if (ret)
+			return ret;
+	}
+
+	return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir);
+}
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -4073,7 +4208,8 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_rxfh_indir_size	= mvpp2_ethtool_get_rxfh_indir_size,
 	.get_rxfh		= mvpp2_ethtool_get_rxfh,
 	.set_rxfh		= mvpp2_ethtool_set_rxfh,
-
+	.get_rxfh_context	= mvpp2_ethtool_get_rxfh_context,
+	.set_rxfh_context	= mvpp2_ethtool_set_rxfh_context,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -4327,6 +4463,11 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	if (err)
 		goto err_free_percpu;
 
+	/* Clear all port stats */
+	mvpp2_read_stats(port);
+	memset(port->ethtool_stats, 0,
+	       MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs) * sizeof(u64));
+
 	return 0;
 
 err_free_percpu:
@@ -4416,11 +4557,12 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	eth_hw_addr_random(dev);
 }
 
-static void mvpp2_phylink_validate(struct net_device *dev,
+static void mvpp2_phylink_validate(struct phylink_config *config,
 				   unsigned long *supported,
 				   struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
 	/* Invalid combinations */
@@ -4544,10 +4686,11 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port,
 		state->pause |= MLO_PAUSE_TX;
 }
 
-static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+static int mvpp2_phylink_mac_link_state(struct phylink_config *config,
 					struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 
 	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
 		u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4563,9 +4706,10 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev,
 	return 1;
 }
 
-static void mvpp2_mac_an_restart(struct net_device *dev)
+static void mvpp2_mac_an_restart(struct phylink_config *config)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 	u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 
 	writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
@@ -4750,9 +4894,10 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 	}
 }
 
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	bool change_interface = port->phy_interface != state->interface;
 
@@ -4792,9 +4937,10 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 	mvpp2_port_enable(port);
 }
 
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
@@ -4819,9 +4965,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
 	netif_tx_wake_all_queues(dev);
 }
 
-static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
-				phy_interface_t interface)
+static void mvpp2_mac_link_down(struct phylink_config *config,
+				unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
@@ -5002,7 +5149,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	}
 
 	port->ethtool_stats = devm_kcalloc(&pdev->dev,
-					   ARRAY_SIZE(mvpp2_ethtool_regs),
+					   MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs),
 					   sizeof(u64), GFP_KERNEL);
 	if (!port->ethtool_stats) {
 		err = -ENOMEM;
@@ -5078,8 +5225,11 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	/* Phylink isn't used w/ ACPI as of now */
 	if (port_node) {
-		phylink = phylink_create(dev, port_fwnode, phy_mode,
-					 &mvpp2_phylink_ops);
+		port->phylink_config.dev = &dev->dev;
+		port->phylink_config.type = PHYLINK_NETDEV;
+
+		phylink = phylink_create(&port->phylink_config, port_fwnode,
+					 phy_mode, &mvpp2_phylink_ops);
 		if (IS_ERR(phylink)) {
 			err = PTR_ERR(phylink);
 			goto err_free_port_pcpu;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index ae2240074d8e..5692c6087bbb 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -312,7 +312,8 @@ static void mvpp2_prs_sram_shift_set(struct mvpp2_prs_entry *pe, int shift,
 	}
 
 	/* Set value */
-	pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] = shift & MVPP2_PRS_SRAM_SHIFT_MASK;
+	pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] |=
+		shift & MVPP2_PRS_SRAM_SHIFT_MASK;
 
 	/* Reset and set operation */
 	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS,
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index d41a2414c575..2d8362f9341b 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -3,4 +3,5 @@
 # Makefile for the Mediatek SoCs built-in ethernet macs
 #
 
-obj-$(CONFIG_NET_MEDIATEK_SOC)			+= mtk_eth_soc.o
+obj-$(CONFIG_NET_MEDIATEK_SOC)                 += mtk_eth.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
new file mode 100644
index 000000000000..7f05880cf9ef
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-2019 MediaTek Inc.
+
+/* A library for configuring path from GMAC/GDM to target PHY
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/phy.h>
+#include <linux/regmap.h>
+
+#include "mtk_eth_soc.h"
+
+struct mtk_eth_muxc {
+	const char	*name;
+	int		cap_bit;
+	int		(*set_path)(struct mtk_eth *eth, int path);
+};
+
+static const char *mtk_eth_path_name(int path)
+{
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_RGMII:
+		return "gmac1_rgmii";
+	case MTK_ETH_PATH_GMAC1_TRGMII:
+		return "gmac1_trgmii";
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		return "gmac1_sgmii";
+	case MTK_ETH_PATH_GMAC2_RGMII:
+		return "gmac2_rgmii";
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		return "gmac2_sgmii";
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		return "gmac2_gephy";
+	case MTK_ETH_PATH_GDM1_ESW:
+		return "gdm1_esw";
+	default:
+		return "unknown path";
+	}
+}
+
+static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
+{
+	bool updated = true;
+	u32 val, mask, set;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		mask = ~(u32)MTK_MUX_TO_ESW;
+		set = 0;
+		break;
+	case MTK_ETH_PATH_GDM1_ESW:
+		mask = ~(u32)MTK_MUX_TO_ESW;
+		set = MTK_MUX_TO_ESW;
+		break;
+	default:
+		updated = false;
+		break;
+	};
+
+	if (updated) {
+		val = mtk_r32(eth, MTK_MAC_MISC);
+		val = (val & mask) | set;
+		mtk_w32(eth, val, MTK_MAC_MISC);
+	}
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		val = ~(u32)GEPHY_MAC_SEL;
+		break;
+	default:
+		updated = false;
+		break;
+	}
+
+	if (updated)
+		regmap_update_bits(eth->infra, INFRA_MISC2, GEPHY_MAC_SEL, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val = CO_QPHY_SEL;
+		break;
+	default:
+		updated = false;
+		break;
+	}
+
+	if (updated)
+		regmap_update_bits(eth->infra, INFRA_MISC2, CO_QPHY_SEL, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		val = SYSCFG0_SGMII_GMAC1;
+		break;
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val = SYSCFG0_SGMII_GMAC2;
+		break;
+	case MTK_ETH_PATH_GMAC1_RGMII:
+	case MTK_ETH_PATH_GMAC2_RGMII:
+		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+		val &= SYSCFG0_SGMII_MASK;
+
+		if ((path == MTK_GMAC1_RGMII && val == SYSCFG0_SGMII_GMAC1) ||
+		    (path == MTK_GMAC2_RGMII && val == SYSCFG0_SGMII_GMAC2))
+			val = 0;
+		else
+			updated = false;
+		break;
+	default:
+		updated = false;
+		break;
+	};
+
+	if (updated)
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		val |= SYSCFG0_SGMII_GMAC1_V2;
+		break;
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		val &= ~(u32)SYSCFG0_SGMII_GMAC2_V2;
+		break;
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val |= SYSCFG0_SGMII_GMAC2_V2;
+		break;
+	default:
+		updated = false;
+	};
+
+	if (updated)
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static const struct mtk_eth_muxc mtk_eth_muxc[] = {
+	{
+		.name = "mux_gdm1_to_gmac1_esw",
+		.cap_bit = MTK_ETH_MUX_GDM1_TO_GMAC1_ESW,
+		.set_path = set_mux_gdm1_to_gmac1_esw,
+	}, {
+		.name = "mux_gmac2_gmac0_to_gephy",
+		.cap_bit = MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY,
+		.set_path = set_mux_gmac2_gmac0_to_gephy,
+	}, {
+		.name = "mux_u3_gmac2_to_qphy",
+		.cap_bit = MTK_ETH_MUX_U3_GMAC2_TO_QPHY,
+		.set_path = set_mux_u3_gmac2_to_qphy,
+	}, {
+		.name = "mux_gmac1_gmac2_to_sgmii_rgmii",
+		.cap_bit = MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII,
+		.set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii,
+	}, {
+		.name = "mux_gmac12_to_gephy_sgmii",
+		.cap_bit = MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII,
+		.set_path = set_mux_gmac12_to_gephy_sgmii,
+	},
+};
+
+static int mtk_eth_mux_setup(struct mtk_eth *eth, int path)
+{
+	int i, err = 0;
+
+	if (!MTK_HAS_CAPS(eth->soc->caps, path)) {
+		dev_err(eth->dev, "path %s isn't support on the SoC\n",
+			mtk_eth_path_name(path));
+		return -EINVAL;
+	}
+
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_MUX))
+		return 0;
+
+	/* Setup MUX in path fabric */
+	for (i = 0; i < ARRAY_SIZE(mtk_eth_muxc); i++) {
+		if (MTK_HAS_CAPS(eth->soc->caps, mtk_eth_muxc[i].cap_bit)) {
+			err = mtk_eth_muxc[i].set_path(eth, path);
+			if (err)
+				goto out;
+		} else {
+			dev_dbg(eth->dev, "mux %s isn't present on the SoC\n",
+				mtk_eth_muxc[i].name);
+		}
+	}
+
+out:
+	return err;
+}
+
+static int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	unsigned int val = 0;
+	int sid, err, path;
+
+	path = (mac_id == 0) ?  MTK_ETH_PATH_GMAC1_SGMII :
+				MTK_ETH_PATH_GMAC2_SGMII;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	/* The path GMAC to SGMII will be enabled once the SGMIISYS is being
+	 * setup done.
+	 */
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+
+	regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+			   SYSCFG0_SGMII_MASK, ~(u32)SYSCFG0_SGMII_MASK);
+
+	/* Decide how GMAC and SGMIISYS be mapped */
+	sid = (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_SGMII)) ? 0 : mac_id;
+
+	/* Setup SGMIISYS with the determined property */
+	if (MTK_HAS_FLAGS(eth->sgmii->flags[sid], MTK_SGMII_PHYSPEED_AN))
+		err = mtk_sgmii_setup_mode_an(eth->sgmii, sid);
+	else
+		err = mtk_sgmii_setup_mode_force(eth->sgmii, sid);
+
+	if (err)
+		return err;
+
+	regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+			   SYSCFG0_SGMII_MASK, val);
+
+	return 0;
+}
+
+static int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	int err, path = 0;
+
+	if (mac_id == 1)
+		path = MTK_ETH_PATH_GMAC2_GEPHY;
+
+	if (!path)
+		return -EINVAL;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	int err, path;
+
+	path = (mac_id == 0) ?  MTK_ETH_PATH_GMAC1_RGMII :
+				MTK_ETH_PATH_GMAC2_RGMII;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mtk_setup_hw_path(struct mtk_eth *eth, int mac_id, int phymode)
+{
+	int err;
+
+	switch (phymode) {
+	case PHY_INTERFACE_MODE_TRGMII:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_REVMII:
+	case PHY_INTERFACE_MODE_RMII:
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_RGMII)) {
+			err = mtk_gmac_rgmii_path_setup(eth, mac_id);
+			if (err)
+				return err;
+		}
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+			err = mtk_gmac_sgmii_path_setup(eth, mac_id);
+			if (err)
+				return err;
+		}
+		break;
+	case PHY_INTERFACE_MODE_GMII:
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_GEPHY)) {
+			err = mtk_gmac_gephy_path_setup(eth, mac_id);
+			if (err)
+				return err;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6cfffb64cd51..b20b3a5a1ebb 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -48,8 +48,10 @@ static const struct mtk_ethtool_stats {
 };
 
 static const char * const mtk_clks_source_name[] = {
-	"ethif", "esw", "gp0", "gp1", "gp2", "trgpll", "sgmii_tx250m",
-	"sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll"
+	"ethif", "sgmiitop", "esw", "gp0", "gp1", "gp2", "fe", "trgpll",
+	"sgmii_tx250m", "sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb",
+	"sgmii2_tx250m", "sgmii2_rx250m", "sgmii2_cdr_ref", "sgmii2_cdr_fb",
+	"sgmii_ck", "eth2pll",
 };
 
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -132,6 +134,31 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
 	return _mtk_mdio_read(eth, phy_addr, phy_reg);
 }
 
+static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
+				     phy_interface_t interface)
+{
+	u32 val;
+
+	/* Check DDR memory type.
+	 * Currently TRGMII mode with DDR2 memory is not supported.
+	 */
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val);
+	if (interface == PHY_INTERFACE_MODE_TRGMII &&
+	    val & SYSCFG_DRAM_TYPE_DDR2) {
+		dev_err(eth->dev,
+			"TRGMII mode with DDR2 memory is not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	val = (interface == PHY_INTERFACE_MODE_TRGMII) ?
+		ETHSYS_TRGMII_MT7621_DDR_PLL : 0;
+
+	regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
+			   ETHSYS_TRGMII_MT7621_MASK, val);
+
+	return 0;
+}
+
 static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
 {
 	u32 val;
@@ -159,47 +186,6 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
 	mtk_w32(eth, val, TRGMII_TCK_CTRL);
 }
 
-static void mtk_gmac_sgmii_hw_setup(struct mtk_eth *eth, int mac_id)
-{
-	u32 val;
-
-	/* Setup the link timer and QPHY power up inside SGMIISYS */
-	regmap_write(eth->sgmiisys, SGMSYS_PCS_LINK_TIMER,
-		     SGMII_LINK_TIMER_DEFAULT);
-
-	regmap_read(eth->sgmiisys, SGMSYS_SGMII_MODE, &val);
-	val |= SGMII_REMOTE_FAULT_DIS;
-	regmap_write(eth->sgmiisys, SGMSYS_SGMII_MODE, val);
-
-	regmap_read(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, &val);
-	val |= SGMII_AN_RESTART;
-	regmap_write(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, val);
-
-	regmap_read(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, &val);
-	val &= ~SGMII_PHYA_PWD;
-	regmap_write(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, val);
-
-	/* Determine MUX for which GMAC uses the SGMII interface */
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_DUAL_GMAC_SHARED_SGMII)) {
-		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
-		val &= ~SYSCFG0_SGMII_MASK;
-		val |= !mac_id ? SYSCFG0_SGMII_GMAC1 : SYSCFG0_SGMII_GMAC2;
-		regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
-
-		dev_info(eth->dev, "setup shared sgmii for gmac=%d\n",
-			 mac_id);
-	}
-
-	/* Setup the GMAC1 going through SGMII path when SoC also support
-	 * ESW on GMAC1
-	 */
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_GMAC1_ESW | MTK_GMAC1_SGMII) &&
-	    !mac_id) {
-		mtk_w32(eth, 0, MTK_MAC_MISC);
-		dev_info(eth->dev, "setup gmac1 going through sgmii");
-	}
-}
-
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -222,9 +208,17 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 		break;
 	}
 
-	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) &&
-	    !mac->id && !mac->trgmii)
-		mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
+	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) && !mac->id) {
+		if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII_MT7621_CLK)) {
+			if (mt7621_gmac0_rgmii_adjust(mac->hw,
+						      dev->phydev->interface))
+				return;
+		} else {
+			if (!mac->trgmii)
+				mtk_gmac0_rgmii_adjust(mac->hw,
+						       dev->phydev->speed);
+		}
+	}
 
 	if (dev->phydev->link)
 		mcr |= MAC_MCR_FORCE_LINK;
@@ -289,6 +283,7 @@ static int mtk_phy_connect(struct net_device *dev)
 	struct mtk_eth *eth;
 	struct device_node *np;
 	u32 val;
+	int err;
 
 	eth = mac->hw;
 	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
@@ -298,6 +293,10 @@ static int mtk_phy_connect(struct net_device *dev)
 	if (!np)
 		return -ENODEV;
 
+	err = mtk_setup_hw_path(eth, mac->id, of_get_phy_mode(np));
+	if (err)
+		goto err_phy;
+
 	mac->ge_mode = 0;
 	switch (of_get_phy_mode(np)) {
 	case PHY_INTERFACE_MODE_TRGMII:
@@ -306,12 +305,10 @@ static int mtk_phy_connect(struct net_device *dev)
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
-		break;
 	case PHY_INTERFACE_MODE_SGMII:
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII))
-			mtk_gmac_sgmii_hw_setup(eth, mac->id);
 		break;
 	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_GMII:
 		mac->ge_mode = 1;
 		break;
 	case PHY_INTERFACE_MODE_REVMII:
@@ -2477,16 +2474,28 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->ethsys);
 	}
 
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
-		eth->sgmiisys =
-		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-						"mediatek,sgmiisys");
-		if (IS_ERR(eth->sgmiisys)) {
-			dev_err(&pdev->dev, "no sgmiisys regmap found\n");
-			return PTR_ERR(eth->sgmiisys);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_INFRA)) {
+		eth->infra = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							     "mediatek,infracfg");
+		if (IS_ERR(eth->infra)) {
+			dev_err(&pdev->dev, "no infracfg regmap found\n");
+			return PTR_ERR(eth->infra);
 		}
 	}
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+		eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii),
+					  GFP_KERNEL);
+		if (!eth->sgmii)
+			return -ENOMEM;
+
+		err = mtk_sgmii_init(eth->sgmii, pdev->dev.of_node,
+				     eth->soc->ana_rgc3);
+
+		if (err)
+			return err;
+	}
+
 	if (eth->soc->required_pctl) {
 		eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 							    "mediatek,pctl");
@@ -2625,34 +2634,43 @@ static int mtk_remove(struct platform_device *pdev)
 }
 
 static const struct mtk_soc_data mt2701_data = {
-	.caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
+	.caps = MT7623_CAPS | MTK_HWLRO,
 	.required_clks = MT7623_CLKS_BITMAP,
 	.required_pctl = true,
 };
 
 static const struct mtk_soc_data mt7621_data = {
-	.caps = MTK_SHARED_INT,
+	.caps = MT7621_CAPS,
 	.required_clks = MT7621_CLKS_BITMAP,
 	.required_pctl = false,
 };
 
 static const struct mtk_soc_data mt7622_data = {
-	.caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW | MTK_HWLRO,
+	.ana_rgc3 = 0x2028,
+	.caps = MT7622_CAPS | MTK_HWLRO,
 	.required_clks = MT7622_CLKS_BITMAP,
 	.required_pctl = false,
 };
 
 static const struct mtk_soc_data mt7623_data = {
-	.caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
+	.caps = MT7623_CAPS | MTK_HWLRO,
 	.required_clks = MT7623_CLKS_BITMAP,
 	.required_pctl = true,
 };
 
+static const struct mtk_soc_data mt7629_data = {
+	.ana_rgc3 = 0x128,
+	.caps = MT7629_CAPS | MTK_HWLRO,
+	.required_clks = MT7629_CLKS_BITMAP,
+	.required_pctl = false,
+};
+
 const struct of_device_id of_mtk_match[] = {
 	{ .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
 	{ .compatible = "mediatek,mt7621-eth", .data = &mt7621_data},
 	{ .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
 	{ .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
+	{ .compatible = "mediatek,mt7629-eth", .data = &mt7629_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index baa85d5601e7..c6be599ed94d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -9,6 +9,10 @@
 #ifndef MTK_ETH_H
 #define MTK_ETH_H
 
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/of_net.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 
 #define MTK_QDMA_PAGE_SIZE	2048
@@ -359,17 +363,27 @@
 #define MT7622_ETH		7622
 #define MT7621_ETH		7621
 
+/* ethernet system control register */
+#define ETHSYS_SYSCFG		0x10
+#define SYSCFG_DRAM_TYPE_DDR2	BIT(4)
+
 /* ethernet subsystem config register */
 #define ETHSYS_SYSCFG0		0x14
 #define SYSCFG0_GE_MASK		0x3
 #define SYSCFG0_GE_MODE(x, y)	(x << (12 + (y * 2)))
-#define SYSCFG0_SGMII_MASK	(3 << 8)
-#define SYSCFG0_SGMII_GMAC1	((2 << 8) & GENMASK(9, 8))
-#define SYSCFG0_SGMII_GMAC2	((3 << 8) & GENMASK(9, 8))
+#define SYSCFG0_SGMII_MASK     GENMASK(9, 8)
+#define SYSCFG0_SGMII_GMAC1    ((2 << 8) & SYSCFG0_SGMII_MASK)
+#define SYSCFG0_SGMII_GMAC2    ((3 << 8) & SYSCFG0_SGMII_MASK)
+#define SYSCFG0_SGMII_GMAC1_V2 BIT(9)
+#define SYSCFG0_SGMII_GMAC2_V2 BIT(8)
+
 
 /* ethernet subsystem clock register */
 #define ETHSYS_CLKCFG0		0x2c
 #define ETHSYS_TRGMII_CLK_SEL362_5	BIT(11)
+#define ETHSYS_TRGMII_MT7621_MASK	(BIT(5) | BIT(6))
+#define ETHSYS_TRGMII_MT7621_APLL	BIT(6)
+#define ETHSYS_TRGMII_MT7621_DDR_PLL	BIT(5)
 
 /* ethernet reset control register */
 #define ETHSYS_RSTCTRL		0x34
@@ -393,6 +407,11 @@
 #define SGMSYS_QPHY_PWR_STATE_CTRL 0xe8
 #define	SGMII_PHYA_PWD		BIT(4)
 
+/* Infrasys subsystem config registers */
+#define INFRA_MISC2            0x70c
+#define CO_QPHY_SEL            BIT(0)
+#define GEPHY_MAC_SEL          BIT(1)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
@@ -457,15 +476,21 @@ enum mtk_tx_flags {
  */
 enum mtk_clks_map {
 	MTK_CLK_ETHIF,
+	MTK_CLK_SGMIITOP,
 	MTK_CLK_ESW,
 	MTK_CLK_GP0,
 	MTK_CLK_GP1,
 	MTK_CLK_GP2,
+	MTK_CLK_FE,
 	MTK_CLK_TRGPLL,
 	MTK_CLK_SGMII_TX_250M,
 	MTK_CLK_SGMII_RX_250M,
 	MTK_CLK_SGMII_CDR_REF,
 	MTK_CLK_SGMII_CDR_FB,
+	MTK_CLK_SGMII2_TX_250M,
+	MTK_CLK_SGMII2_RX_250M,
+	MTK_CLK_SGMII2_CDR_REF,
+	MTK_CLK_SGMII2_CDR_FB,
 	MTK_CLK_SGMII_CK,
 	MTK_CLK_ETH2PLL,
 	MTK_CLK_MAX
@@ -484,6 +509,19 @@ enum mtk_clks_map {
 				 BIT(MTK_CLK_SGMII_CK) | \
 				 BIT(MTK_CLK_ETH2PLL))
 #define MT7621_CLKS_BITMAP	(0)
+#define MT7629_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
+				 BIT(MTK_CLK_GP2) | BIT(MTK_CLK_FE) | \
+				 BIT(MTK_CLK_SGMII_TX_250M) | \
+				 BIT(MTK_CLK_SGMII_RX_250M) | \
+				 BIT(MTK_CLK_SGMII_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII2_TX_250M) | \
+				 BIT(MTK_CLK_SGMII2_RX_250M) | \
+				 BIT(MTK_CLK_SGMII2_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII2_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII_CK) | \
+				 BIT(MTK_CLK_ETH2PLL) | BIT(MTK_CLK_SGMIITOP))
 
 enum mtk_dev_state {
 	MTK_HW_INIT,
@@ -554,21 +592,120 @@ struct mtk_rx_ring {
 	u32 crx_idx_reg;
 };
 
-#define MTK_TRGMII			BIT(0)
-#define MTK_GMAC1_TRGMII		(BIT(1) | MTK_TRGMII)
-#define MTK_ESW				BIT(4)
-#define MTK_GMAC1_ESW			(BIT(5) | MTK_ESW)
-#define MTK_SGMII			BIT(8)
-#define MTK_GMAC1_SGMII			(BIT(9) | MTK_SGMII)
-#define MTK_GMAC2_SGMII			(BIT(10) | MTK_SGMII)
-#define MTK_DUAL_GMAC_SHARED_SGMII	(BIT(11) | MTK_GMAC1_SGMII | \
-					 MTK_GMAC2_SGMII)
-#define MTK_HWLRO			BIT(12)
-#define MTK_SHARED_INT			BIT(13)
+enum mkt_eth_capabilities {
+	MTK_RGMII_BIT = 0,
+	MTK_TRGMII_BIT,
+	MTK_SGMII_BIT,
+	MTK_ESW_BIT,
+	MTK_GEPHY_BIT,
+	MTK_MUX_BIT,
+	MTK_INFRA_BIT,
+	MTK_SHARED_SGMII_BIT,
+	MTK_HWLRO_BIT,
+	MTK_SHARED_INT_BIT,
+	MTK_TRGMII_MT7621_CLK_BIT,
+
+	/* MUX BITS*/
+	MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT,
+	MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT,
+	MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT,
+	MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT,
+	MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT,
+
+	/* PATH BITS */
+	MTK_ETH_PATH_GMAC1_RGMII_BIT,
+	MTK_ETH_PATH_GMAC1_TRGMII_BIT,
+	MTK_ETH_PATH_GMAC1_SGMII_BIT,
+	MTK_ETH_PATH_GMAC2_RGMII_BIT,
+	MTK_ETH_PATH_GMAC2_SGMII_BIT,
+	MTK_ETH_PATH_GMAC2_GEPHY_BIT,
+	MTK_ETH_PATH_GDM1_ESW_BIT,
+};
+
+/* Supported hardware group on SoCs */
+#define MTK_RGMII		BIT(MTK_RGMII_BIT)
+#define MTK_TRGMII		BIT(MTK_TRGMII_BIT)
+#define MTK_SGMII		BIT(MTK_SGMII_BIT)
+#define MTK_ESW			BIT(MTK_ESW_BIT)
+#define MTK_GEPHY		BIT(MTK_GEPHY_BIT)
+#define MTK_MUX			BIT(MTK_MUX_BIT)
+#define MTK_INFRA		BIT(MTK_INFRA_BIT)
+#define MTK_SHARED_SGMII	BIT(MTK_SHARED_SGMII_BIT)
+#define MTK_HWLRO		BIT(MTK_HWLRO_BIT)
+#define MTK_SHARED_INT		BIT(MTK_SHARED_INT_BIT)
+#define MTK_TRGMII_MT7621_CLK	BIT(MTK_TRGMII_MT7621_CLK_BIT)
+
+#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW		\
+	BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
+#define MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY	\
+	BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT)
+#define MTK_ETH_MUX_U3_GMAC2_TO_QPHY		\
+	BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT)
+#define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII	\
+	BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT)
+#define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII	\
+	BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT)
+
+/* Supported path present on SoCs */
+#define MTK_ETH_PATH_GMAC1_RGMII	BIT(MTK_ETH_PATH_GMAC1_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_TRGMII	BIT(MTK_ETH_PATH_GMAC1_TRGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_SGMII	BIT(MTK_ETH_PATH_GMAC1_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_RGMII	BIT(MTK_ETH_PATH_GMAC2_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_SGMII	BIT(MTK_ETH_PATH_GMAC2_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_GEPHY	BIT(MTK_ETH_PATH_GMAC2_GEPHY_BIT)
+#define MTK_ETH_PATH_GDM1_ESW		BIT(MTK_ETH_PATH_GDM1_ESW_BIT)
+
+#define MTK_GMAC1_RGMII		(MTK_ETH_PATH_GMAC1_RGMII | MTK_RGMII)
+#define MTK_GMAC1_TRGMII	(MTK_ETH_PATH_GMAC1_TRGMII | MTK_TRGMII)
+#define MTK_GMAC1_SGMII		(MTK_ETH_PATH_GMAC1_SGMII | MTK_SGMII)
+#define MTK_GMAC2_RGMII		(MTK_ETH_PATH_GMAC2_RGMII | MTK_RGMII)
+#define MTK_GMAC2_SGMII		(MTK_ETH_PATH_GMAC2_SGMII | MTK_SGMII)
+#define MTK_GMAC2_GEPHY		(MTK_ETH_PATH_GMAC2_GEPHY | MTK_GEPHY)
+#define MTK_GDM1_ESW		(MTK_ETH_PATH_GDM1_ESW | MTK_ESW)
+
+/* MUXes present on SoCs */
+/* 0: GDM1 -> GMAC1, 1: GDM1 -> ESW */
+#define MTK_MUX_GDM1_TO_GMAC1_ESW (MTK_ETH_MUX_GDM1_TO_GMAC1_ESW | MTK_MUX)
+
+/* 0: GMAC2 -> GEPHY, 1: GMAC0 -> GePHY */
+#define MTK_MUX_GMAC2_GMAC0_TO_GEPHY    \
+	(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY | MTK_MUX | MTK_INFRA)
+
+/* 0: U3 -> QPHY, 1: GMAC2 -> QPHY */
+#define MTK_MUX_U3_GMAC2_TO_QPHY        \
+	(MTK_ETH_MUX_U3_GMAC2_TO_QPHY | MTK_MUX | MTK_INFRA)
+
+/* 2: GMAC1 -> SGMII, 3: GMAC2 -> SGMII */
+#define MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII      \
+	(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_MUX | \
+	MTK_SHARED_SGMII)
+
+/* 0: GMACx -> GEPHY, 1: GMACx -> SGMII where x is 1 or 2 */
+#define MTK_MUX_GMAC12_TO_GEPHY_SGMII   \
+	(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX)
+
 #define MTK_HAS_CAPS(caps, _x)		(((caps) & (_x)) == (_x))
 
+#define MT7621_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \
+		      MTK_GMAC2_RGMII | MTK_SHARED_INT | MTK_TRGMII_MT7621_CLK)
+
+#define MT7622_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_SGMII | MTK_GMAC2_RGMII | \
+		      MTK_GMAC2_SGMII | MTK_GDM1_ESW | \
+		      MTK_MUX_GDM1_TO_GMAC1_ESW | \
+		      MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII)
+
+#define MT7623_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | MTK_GMAC2_RGMII)
+
+#define MT7629_CAPS  (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | MTK_GMAC2_GEPHY | \
+		      MTK_GDM1_ESW | MTK_MUX_GDM1_TO_GMAC1_ESW | \
+		      MTK_MUX_GMAC2_GMAC0_TO_GEPHY | \
+		      MTK_MUX_U3_GMAC2_TO_QPHY | \
+		      MTK_MUX_GMAC12_TO_GEPHY_SGMII)
+
 /* struct mtk_eth_data -	This is the structure holding all differences
  *				among various plaforms
+ * @ana_rgc3:                   The offset for register ANA_RGC3 related to
+ *				sgmiisys syscon
  * @caps			Flags shown the extra capability for the SoC
  * @required_clks		Flags shown the bitmap for required clocks on
  *				the target SoC
@@ -576,6 +713,7 @@ struct mtk_rx_ring {
  *				the extra setup for those pins used by GMAC.
  */
 struct mtk_soc_data {
+	u32             ana_rgc3;
 	u32		caps;
 	u32		required_clks;
 	bool		required_pctl;
@@ -584,6 +722,26 @@ struct mtk_soc_data {
 /* currently no SoC has more than 2 macs */
 #define MTK_MAX_DEVS			2
 
+#define MTK_SGMII_PHYSPEED_AN          BIT(31)
+#define MTK_SGMII_PHYSPEED_MASK        GENMASK(0, 2)
+#define MTK_SGMII_PHYSPEED_1000        BIT(0)
+#define MTK_SGMII_PHYSPEED_2500        BIT(1)
+#define MTK_HAS_FLAGS(flags, _x)       (((flags) & (_x)) == (_x))
+
+/* struct mtk_sgmii -  This is the structure holding sgmii regmap and its
+ *                     characteristics
+ * @regmap:            The register map pointing at the range used to setup
+ *                     SGMII modes
+ * @flags:             The enum refers to which mode the sgmii wants to run on
+ * @ana_rgc3:          The offset refers to register ANA_RGC3 related to regmap
+ */
+
+struct mtk_sgmii {
+	struct regmap   *regmap[MTK_MAX_DEVS];
+	u32             flags[MTK_MAX_DEVS];
+	u32             ana_rgc3;
+};
+
 /* struct mtk_eth -	This is the main datasructure for holding the state
  *			of the driver
  * @dev:		The device pointer
@@ -599,8 +757,8 @@ struct mtk_soc_data {
  * @msg_enable:		Ethtool msg level
  * @ethsys:		The register map pointing at the range used to setup
  *			MII modes
- * @sgmiisys:		The register map pointing at the range used to setup
- *			SGMII modes
+ * @infra:              The register map pointing at the range used to setup
+ *                      SGMII and GePHY path
  * @pctl:		The register map pointing at the range used to setup
  *			GMAC port drive/slew values
  * @dma_refcnt:		track how many netdevs are using the DMA engine
@@ -632,7 +790,8 @@ struct mtk_eth {
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
-	struct regmap			*sgmiisys;
+	struct regmap                   *infra;
+	struct mtk_sgmii                *sgmii;
 	struct regmap			*pctl;
 	bool				hwlro;
 	refcount_t			dma_refcnt;
@@ -683,4 +842,10 @@ void mtk_stats_update_mac(struct mtk_mac *mac);
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
 u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
 
+int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *np,
+		   u32 ana_rgc3);
+int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id);
+int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id);
+int mtk_setup_hw_path(struct mtk_eth *eth, int mac_id, int phymode);
+
 #endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
new file mode 100644
index 000000000000..136f90ce5a65
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-2019 MediaTek Inc.
+
+/* A library for MediaTek SGMII circuit
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+
+#include "mtk_eth_soc.h"
+
+int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
+{
+	struct device_node *np;
+	const char *str;
+	int i, err;
+
+	ss->ana_rgc3 = ana_rgc3;
+
+	for (i = 0; i < MTK_MAX_DEVS; i++) {
+		np = of_parse_phandle(r, "mediatek,sgmiisys", i);
+		if (!np)
+			break;
+
+		ss->regmap[i] = syscon_node_to_regmap(np);
+		if (IS_ERR(ss->regmap[i]))
+			return PTR_ERR(ss->regmap[i]);
+
+		err = of_property_read_string(np, "mediatek,physpeed", &str);
+		if (err)
+			return err;
+
+		if (!strcmp(str, "2500"))
+			ss->flags[i] |= MTK_SGMII_PHYSPEED_2500;
+		else if (!strcmp(str, "1000"))
+			ss->flags[i] |= MTK_SGMII_PHYSPEED_1000;
+		else if (!strcmp(str, "auto"))
+			ss->flags[i] |= MTK_SGMII_PHYSPEED_AN;
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id)
+{
+	unsigned int val;
+
+	if (!ss->regmap[id])
+		return -EINVAL;
+
+	/* Setup the link timer and QPHY power up inside SGMIISYS */
+	regmap_write(ss->regmap[id], SGMSYS_PCS_LINK_TIMER,
+		     SGMII_LINK_TIMER_DEFAULT);
+
+	regmap_read(ss->regmap[id], SGMSYS_SGMII_MODE, &val);
+	val |= SGMII_REMOTE_FAULT_DIS;
+	regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val);
+
+	regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val);
+	val |= SGMII_AN_RESTART;
+	regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val);
+
+	regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	return 0;
+}
+
+int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id)
+{
+	unsigned int val;
+	int mode;
+
+	if (!ss->regmap[id])
+		return -EINVAL;
+
+	regmap_read(ss->regmap[id], ss->ana_rgc3, &val);
+	val &= ~GENMASK(2, 3);
+	mode = ss->flags[id] & MTK_SGMII_PHYSPEED_MASK;
+	val |= (mode == MTK_SGMII_PHYSPEED_1000) ? 0 : BIT(2);
+	regmap_write(ss->regmap[id], ss->ana_rgc3, val);
+
+	/* Disable SGMII AN */
+	regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val);
+	val &= ~BIT(12);
+	regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val);
+
+	/* SGMII force mode setting */
+	val = 0x31120019;
+	regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val);
+
+	/* Release PHYA power down state */
+	regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 2391e3cfb56b..37fef8cd25e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -34,6 +34,7 @@ config MLX5_CORE_EN
 	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	select PAGE_POOL
+	select DIMLIB
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
@@ -96,26 +97,60 @@ config MLX5_CORE_IPOIB
 	---help---
 	  MLX5 IPoIB offloads & acceleration support.
 
+config MLX5_FPGA_IPSEC
+	bool "Mellanox Technologies IPsec Innova support"
+	depends on MLX5_CORE
+	depends on MLX5_FPGA
+	default n
+	help
+	Build IPsec support for the Innova family of network cards by Mellanox
+	Technologies. Innova network cards are comprised of a ConnectX chip
+	and an FPGA chip on one board. If you select this option, the
+	mlx5_core driver will include the Innova FPGA core and allow building
+	sandbox-specific client drivers.
+
 config MLX5_EN_IPSEC
 	bool "IPSec XFRM cryptography-offload accelaration"
-	depends on MLX5_ACCEL
 	depends on MLX5_CORE_EN
 	depends on XFRM_OFFLOAD
 	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+	depends on MLX5_FPGA_IPSEC
 	default n
-	---help---
+	help
 	  Build support for IPsec cryptography-offload accelaration in the NIC.
 	  Note: Support for hardware with this capability needs to be selected
 	  for this option to become available.
 
-config MLX5_EN_TLS
-	bool "TLS cryptography-offload accelaration"
+config MLX5_FPGA_TLS
+	bool "Mellanox Technologies TLS Innova support"
+	depends on TLS_DEVICE
+	depends on TLS=y || MLX5_CORE=m
+	depends on MLX5_FPGA
+	default n
+	help
+	Build TLS support for the Innova family of network cards by Mellanox
+	Technologies. Innova network cards are comprised of a ConnectX chip
+	and an FPGA chip on one board. If you select this option, the
+	mlx5_core driver will include the Innova FPGA core and allow building
+	sandbox-specific client drivers.
+
+config MLX5_TLS
+	bool "Mellanox Technologies TLS Connect-X support"
 	depends on MLX5_CORE_EN
 	depends on TLS_DEVICE
 	depends on TLS=y || MLX5_CORE=m
-	depends on MLX5_ACCEL
+	select MLX5_ACCEL
 	default n
-	---help---
-	  Build support for TLS cryptography-offload accelaration in the NIC.
-	  Note: Support for hardware with this capability needs to be selected
-	  for this option to become available.
+	help
+	Build TLS support for the Connect-X family of network cards by Mellanox
+	Technologies.
+
+config MLX5_EN_TLS
+	bool "TLS cryptography-offload accelaration"
+	depends on MLX5_CORE_EN
+	depends on MLX5_FPGA_TLS || MLX5_TLS
+	default y
+	help
+	Build support for TLS cryptography-offload accelaration in the NIC.
+	Note: Support for hardware with this capability needs to be selected
+	for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 243368dc23db..57d2cc666fe3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -13,9 +13,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 #
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
-		transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
-		lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
+		lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
+		diag/fw_tracer.o diag/crdump.o devlink.o
 
 #
 # Netdev basic
@@ -23,7 +24,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
 		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
-		en/params.o
+		en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
 
 #
 # Netdev extra
@@ -31,12 +32,15 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
+					lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
+					en/tc_tun_geneve.o
 
 #
 # Core extra
 #
-mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o rdma.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
+				      ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
@@ -49,12 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
 #
 # Accelerations & FPGA
 #
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
+mlx5_core-$(CONFIG_MLX5_FPGA_TLS)   += fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_ACCEL)      += lib/crypto.o accel/tls.o accel/ipsec.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
-				 fpga/ipsec.o fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
 
 mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
 				     en_accel/ipsec_stats.o
 
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+				   en_accel/ktls.o en_accel/ktls_tx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 9f1b1939716a..eddc34e4a762 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -31,6 +31,8 @@
  *
  */
 
+#ifdef CONFIG_MLX5_FPGA_IPSEC
+
 #include <linux/mlx5/device.h>
 
 #include "accel/ipsec.h"
@@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 	return mlx5_fpga_ipsec_init(mdev);
 }
 
+void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+	mlx5_fpga_ipsec_build_fs_cmds();
+}
+
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_ipsec_cleanup(mdev);
@@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
 	return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
 }
 EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 024dbd22a89b..530e428d46ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -37,7 +37,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/accel.h>
 
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_FPGA_IPSEC
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
 			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
@@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
 void mlx5_accel_esp_free_hw_context(void *context);
 
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_build_fs_cmds(void);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #else
@@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static inline void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+}
+
 static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index da7bd26368f9..cab708af3422 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -35,6 +35,9 @@
 
 #include "accel/tls.h"
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+#ifdef CONFIG_MLX5_FPGA_TLS
 #include "fpga/tls.h"
 
 int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
@@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
 
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
 {
-	return mlx5_fpga_is_tls_device(mdev);
+	return mlx5_fpga_is_tls_device(mdev) ||
+		mlx5_accel_is_ktls_device(mdev);
 }
 
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
@@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_tls_cleanup(mdev);
 }
+#endif
+
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+			 struct tls_crypto_info *crypto_info,
+			 u32 *p_key_id)
+{
+	u32 sz_bytes;
+	void *key;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *info =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+		key      = info->key;
+		sz_bytes = sizeof(info->key);
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *info =
+			(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+		key      = info->key;
+		sz_bytes = sizeof(info->key);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	mlx5_destroy_encryption_key(mdev, key_id);
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index def4093ebfae..879321b21616 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -37,8 +37,51 @@
 #include <linux/mlx5/driver.h>
 #include <linux/tls.h>
 
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+			 struct tls_crypto_info *crypto_info,
+			 u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
 
+static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+	if (!MLX5_CAP_GEN(mdev, tls))
+		return false;
+
+	if (!MLX5_CAP_GEN(mdev, log_max_dek))
+		return false;
+
+	return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+					 struct tls_crypto_info *crypto_info)
+{
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		if (crypto_info->version == TLS_1_2_VERSION)
+			return MLX5_CAP_TLS(mdev,  tls_1_2_aes_gcm_128);
+		break;
+	}
+
+	return false;
+}
+#else
+static inline int
+mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+		     struct tls_crypto_info *crypto_info,
+		     u32 *p_key_id) { return -ENOTSUPP; }
+static inline void
+mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
+
+static inline bool
+mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool
+mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+		      struct tls_crypto_info *crypto_info) { return false; }
+#endif
+
+#ifdef CONFIG_MLX5_FPGA_TLS
 enum {
 	MLX5_ACCEL_TLS_TX = BIT(0),
 	MLX5_ACCEL_TLS_RX = BIT(1),
@@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 					   bool direction_sx) { }
 static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
 					   u32 seq, u64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+	return mlx5_accel_is_ktls_device(mdev);
+}
 static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
 static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-
 #endif
 
 #endif	/* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index e94686c42000..8cdd7e66f8df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_DEALLOC_MEMIC:
 	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
-	case MLX5_CMD_OP_QUERY_HOST_PARAMS:
+	case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -632,7 +632,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
-	MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
+	MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS);
 	MLX5_COMMAND_STR_CASE(CREATE_UCTX);
 	MLX5_COMMAND_STR_CASE(DESTROY_UCTX);
 	MLX5_COMMAND_STR_CASE(CREATE_UMEM);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 713a17ee3751..818edc63e428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 	list_for_each_entry_safe(mcq, temp, &ctx->process_list,
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
-		mcq->tasklet_ctx.comp(mcq);
+		mcq->tasklet_ctx.comp(mcq, NULL);
 		mlx5_cq_put(mcq);
 		if (time_after(jiffies, end))
 			break;
@@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 		tasklet_schedule(&ctx->task);
 }
 
-static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
+				   struct mlx5_eqe *eqe)
 {
 	unsigned long flags;
 	struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 }
 
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			u32 *in, int inlen)
+			u32 *in, int inlen, u32 *out, int outlen)
 {
 	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
 	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
 	struct mlx5_eq_comp *eq;
 	int err;
@@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	if (IS_ERR(eq))
 		return PTR_ERR(eq);
 
-	memset(out, 0, sizeof(out));
+	memset(out, 0, outlen);
 	MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err)
 		return err;
 
@@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
 	int err;
 
-	err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
-	if (err)
-		return err;
-
-	err = mlx5_eq_del_cq(&cq->eq->core, cq);
-	if (err)
-		return err;
+	mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
+	mlx5_eq_del_cq(&cq->eq->core, cq);
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index f6b1da99e6c2..5bb6a26ea267 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -311,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
 /* Must be called with intf_mutex held */
 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
 {
-	u32 pci_id = mlx5_gen_pci_id(dev);
 	struct mlx5_core_dev *res = NULL;
 	struct mlx5_core_dev *tmp_dev;
 	struct mlx5_priv *priv;
+	u32 pci_id;
 
+	if (!mlx5_core_is_pf(dev))
+		return NULL;
+
+	pci_id = mlx5_gen_pci_id(dev);
 	list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
 		tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+		if (!mlx5_core_is_pf(tmp_dev))
+			continue;
+
 		if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
 			res = tmp_dev;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
new file mode 100644
index 000000000000..a400f4430c28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <devlink.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int mlx5_devlink_flash_update(struct devlink *devlink,
+				     const char *file_name,
+				     const char *component,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	const struct firmware *fw;
+	int err;
+
+	if (component)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
+	if (err)
+		return err;
+
+	return mlx5_firmware_flash(dev, fw, extack);
+}
+
+static u8 mlx5_fw_ver_major(u32 version)
+{
+	return (version >> 24) & 0xff;
+}
+
+static u8 mlx5_fw_ver_minor(u32 version)
+{
+	return (version >> 16) & 0xff;
+}
+
+static u16 mlx5_fw_ver_subminor(u32 version)
+{
+	return version & 0xffff;
+}
+
+#define DEVLINK_FW_STRING_LEN 32
+
+static int
+mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+		      struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	char version_str[DEVLINK_FW_STRING_LEN];
+	u32 running_fw, stored_fw;
+	int err;
+
+	err = devlink_info_driver_name_put(req, DRIVER_NAME);
+	if (err)
+		return err;
+
+	err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+	if (err)
+		return err;
+
+	err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
+	if (err)
+		return err;
+
+	snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+		 mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
+		 mlx5_fw_ver_subminor(running_fw));
+	err = devlink_info_version_running_put(req, "fw.version", version_str);
+	if (err)
+		return err;
+
+	/* no pending version, return running (stored) version */
+	if (stored_fw == 0)
+		stored_fw = running_fw;
+
+	snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+		 mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw),
+		 mlx5_fw_ver_subminor(stored_fw));
+	err = devlink_info_version_stored_put(req, "fw.version", version_str);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+	.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+	.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+#endif
+	.flash_update = mlx5_devlink_flash_update,
+	.info_get = mlx5_devlink_info_get,
+};
+
+struct devlink *mlx5_devlink_alloc(void)
+{
+	return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+}
+
+void mlx5_devlink_free(struct devlink *devlink)
+{
+	devlink_free(devlink);
+}
+
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+{
+	return devlink_register(devlink, dev);
+}
+
+void mlx5_devlink_unregister(struct devlink *devlink)
+{
+	devlink_unregister(devlink);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
new file mode 100644
index 000000000000..d0ba03774ddf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef __MLX5_DEVLINK_H__
+#define __MLX5_DEVLINK_H__
+
+#include <net/devlink.h>
+
+struct devlink *mlx5_devlink_alloc(void);
+void mlx5_devlink_free(struct devlink *devlink);
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+void mlx5_devlink_unregister(struct devlink *devlink);
+
+#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000000..28d02749d3c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE	0x7
+
+static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
+{
+	return !!dev->priv.health.crdump_size;
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+	u32 crdump_size = dev->priv.health.crdump_size;
+	int i, ret;
+
+	for (i = 0; i < (crdump_size / 4); i++)
+		cr_data[i] = BAD_ACCESS;
+
+	ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
+	if (ret <= 0) {
+		if (ret == 0)
+			return -EIO;
+		return ret;
+	}
+
+	if (crdump_size != ret) {
+		mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+			       ret, crdump_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+	int ret;
+
+	if (!mlx5_crdump_enabled(dev))
+		return -ENODEV;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret) {
+		mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+			       ret);
+		return ret;
+	}
+	/* Verify no other PF is running cr-dump or sw reset */
+	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
+				     MLX5_VSC_LOCK);
+	if (ret) {
+		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+		goto unlock_gw;
+	}
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+	if (ret)
+		goto unlock_sem;
+
+	ret = mlx5_crdump_fill(dev, cr_data);
+
+unlock_sem:
+	mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK);
+unlock_gw:
+	mlx5_vsc_gw_unlock(dev);
+	return ret;
+}
+
+int mlx5_crdump_enable(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	u32 space_size;
+	int ret;
+
+	if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+	    mlx5_crdump_enabled(dev))
+		return 0;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret)
+		return ret;
+
+	/* Check if space is supported and get space size */
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+				    &space_size);
+	if (ret) {
+		/* Unlock and mask error since space is not supported */
+		mlx5_vsc_gw_unlock(dev);
+		return 0;
+	}
+
+	if (!space_size) {
+		mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+		mlx5_vsc_gw_unlock(dev);
+		return -EINVAL;
+	}
+
+	ret = mlx5_vsc_gw_unlock(dev);
+	if (ret)
+		return ret;
+
+	priv->health.crdump_size = space_size;
+	return 0;
+}
+
+void mlx5_crdump_disable(struct mlx5_core_dev *dev)
+{
+	dev->priv.health.crdump_size = 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index a4cf123e3f17..ddf1b87f1bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte,
 		__field(u32, index)
 		__field(u32, action)
 		__field(u32, flow_tag)
+		__field(u32, flow_source)
 		__field(u8,  mask_enable)
 		__field(int, new_fte)
 		__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte,
 			   __entry->index = fte->index;
 			   __entry->action = fte->action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->action.flow_tag;
+			   __entry->flow_tag = fte->flow_context.flow_tag;
+			   __entry->flow_source = fte->flow_context.flow_source;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 6999f4486e9e..8a4930c8bf62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -243,6 +243,19 @@ free_strings_db:
 	return -ENOMEM;
 }
 
+static void
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+	tracer->st_arr.saved_traces_index = 0;
+	mutex_init(&tracer->st_arr.lock);
+}
+
+static void
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+	mutex_destroy(&tracer->st_arr.lock);
+}
+
 static void mlx5_tracer_read_strings_db(struct work_struct *work)
 {
 	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
 		list_del(&str_frmt->list);
 }
 
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+				      u64 timestamp, bool lost,
+				      u8 event_id, char *msg)
+{
+	struct mlx5_fw_trace_data *trace_data;
+
+	mutex_lock(&tracer->st_arr.lock);
+	trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
+	trace_data->timestamp = timestamp;
+	trace_data->lost = lost;
+	trace_data->event_id = event_id;
+	strncpy(trace_data->msg, msg, TRACE_STR_MSG);
+
+	tracer->st_arr.saved_traces_index =
+		(tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+	mutex_unlock(&tracer->st_arr.lock);
+}
+
 static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 				    struct mlx5_core_dev *dev,
 				    u64 trace_timestamp)
@@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 	trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
 		      str_frmt->event_id, tmp);
 
+	mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+				  str_frmt->lost, str_frmt->event_id, tmp);
+
 	/* remove it from hash */
 	mlx5_tracer_clean_message(str_frmt);
 }
@@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
 	mlx5_fw_tracer_start(tracer);
 }
 
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
+					    u32 *in, int size_in)
+{
+	u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+
+	if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
+	    !MLX5_CAP_DEBUG(dev, core_dump_qp))
+		return -EOPNOTSUPP;
+
+	return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
+				    MLX5_REG_CORE_DUMP, 0, 1);
+}
+
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_tracer *tracer = dev->tracer;
+	u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+	int err;
+
+	if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
+		return -EOPNOTSUPP;
+	if (!tracer->owner)
+		return -EPERM;
+
+	MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
+
+	err =  mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
+	if (err)
+		return err;
+	queue_work(tracer->work_queue, &tracer->handle_traces_work);
+	flush_workqueue(tracer->work_queue);
+	return 0;
+}
+
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+			     struct mlx5_fw_trace_data *trace_data)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+	return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg)
+{
+	struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
+	u32 index, start_index, end_index;
+	u32 saved_traces_index;
+	int err;
+
+	if (!straces[0].timestamp)
+		return -ENOMSG;
+
+	mutex_lock(&tracer->st_arr.lock);
+	saved_traces_index = tracer->st_arr.saved_traces_index;
+	if (straces[saved_traces_index].timestamp)
+		start_index = saved_traces_index;
+	else
+		start_index = 0;
+	end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
+	if (err)
+		goto unlock;
+	index = start_index;
+	while (index != end_index) {
+		err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
+		if (err)
+			goto unlock;
+
+		index = (index + 1) & (SAVED_TRACES_NUM - 1);
+	}
+
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+	mutex_unlock(&tracer->st_arr.lock);
+	return err;
+}
+
 /* Create software resources (Buffers, etc ..) */
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 {
@@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 		goto free_log_buf;
 	}
 
+	mlx5_fw_tracer_init_saved_traces_array(tracer);
 	mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
 
 	return tracer;
@@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	cancel_work_sync(&tracer->read_fw_strings_work);
 	mlx5_fw_tracer_clean_ready_list(tracer);
 	mlx5_fw_tracer_clean_print_hash(tracer);
+	mlx5_fw_tracer_clean_saved_traces_array(tracer);
 	mlx5_fw_tracer_free_strings_db(tracer);
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 	flush_workqueue(tracer->work_queue);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index a8b8747f2b61..40601fba80ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -46,6 +46,9 @@
 #define TRACER_BLOCK_SIZE_BYTE 256
 #define TRACES_PER_BLOCK 32
 
+#define TRACE_STR_MSG 256
+#define SAVED_TRACES_NUM 8192
+
 #define TRACER_MAX_PARAMS 7
 #define MESSAGE_HASH_BITS 6
 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -53,6 +56,13 @@
 #define MASK_52_7 (0x1FFFFFFFFFFF80)
 #define MASK_6_0  (0x7F)
 
+struct mlx5_fw_trace_data {
+	u64 timestamp;
+	bool lost;
+	u8 event_id;
+	char msg[TRACE_STR_MSG];
+};
+
 struct mlx5_fw_tracer {
 	struct mlx5_core_dev *dev;
 	struct mlx5_nb        nb;
@@ -83,6 +93,13 @@ struct mlx5_fw_tracer {
 		u32 consumer_index;
 	} buff;
 
+	/* Saved Traces Array */
+	struct {
+		struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM];
+		u32 saved_traces_index;
+		struct mutex lock; /* Protect st_arr access */
+	} st_arr;
+
 	u64 last_timestamp;
 	struct work_struct handle_traces_work;
 	struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 0ccd6d40baf7..d2228e37450f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
 
 	mlx5_peer_pf_cleanup(dev);
 }
-
-static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
-					  u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
-
-	MLX5_SET(query_host_params_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_HOST_PARAMS);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{
-	u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
-	int err;
-
-	err = mlx5_query_host_params_context(dev, out, sizeof(out));
-	if (err)
-		return err;
-
-	*num_vf = MLX5_GET(query_host_params_out, out,
-			   host_params_context.host_num_of_vfs);
-	mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
-
-	return 0;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index 346372df218f..d3d7a00a02ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -16,7 +16,6 @@ enum {
 bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
 int mlx5_ec_init(struct mlx5_core_dev *dev);
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
@@ -24,9 +23,6 @@ static inline bool
 mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
 static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
-static inline int
-mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{ return -EOPNOTSUPP; }
 
 #endif /* CONFIG_MLX5_ESWITCH */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cc6797e24571..263558875f20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -48,7 +48,7 @@
 #include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include <linux/bits.h>
 #include "wq.h"
 #include "mlx5_core.h"
@@ -137,6 +137,7 @@ struct page_pool;
 #define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE >> 1)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
+#define MLX5E_TX_XSK_POLL_BUDGET       64
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
 #define MLX5E_UMR_WQE_INLINE_SZ \
@@ -155,6 +156,11 @@ do {                                                            \
 			    ##__VA_ARGS__);                     \
 } while (0)
 
+enum mlx5e_rq_group {
+	MLX5E_RQ_GROUP_REGULAR,
+	MLX5E_RQ_GROUP_XSK,
+	MLX5E_NUM_RQ_GROUPS /* Keep last. */
+};
 
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
 {
@@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 /* Use this function to get max num channels after netdev was created */
 static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
 {
-	return min_t(unsigned int, netdev->num_rx_queues,
+	return min_t(unsigned int,
+		     netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS,
 		     netdev->num_tx_queues);
 }
 
@@ -202,7 +209,10 @@ struct mlx5e_umr_wqe {
 	struct mlx5_wqe_ctrl_seg       ctrl;
 	struct mlx5_wqe_umr_ctrl_seg   uctrl;
 	struct mlx5_mkey_seg           mkc;
-	struct mlx5_mtt                inline_mtts[0];
+	union {
+		struct mlx5_mtt        inline_mtts[0];
+		u8                     tls_static_params_ctx[0];
+	};
 };
 
 extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
@@ -238,9 +248,9 @@ struct mlx5e_params {
 	u16 num_channels;
 	u8  num_tc;
 	bool rx_cqe_compress_def;
-	struct net_dim_cq_moder rx_cq_moderation;
-	struct net_dim_cq_moder tx_cq_moderation;
 	bool tunneled_offload_en;
+	struct dim_cq_moder rx_cq_moderation;
+	struct dim_cq_moder tx_cq_moderation;
 	bool lro_en;
 	u8  tx_min_inline_mode;
 	bool vlan_strip_disable;
@@ -250,6 +260,7 @@ struct mlx5e_params {
 	u32 lro_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
+	struct mlx5e_xsk *xsk;
 	unsigned int sw_mtu;
 	int hard_mtu;
 };
@@ -325,6 +336,9 @@ struct mlx5e_tx_wqe_info {
 	u32 num_bytes;
 	u8  num_wqebbs;
 	u8  num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+	skb_frag_t *resync_dump_frag;
+#endif
 };
 
 enum mlx5e_dma_map_type {
@@ -348,6 +362,13 @@ enum {
 
 struct mlx5e_sq_wqe_info {
 	u8  opcode;
+
+	/* Auxiliary data for different opcodes. */
+	union {
+		struct {
+			struct mlx5e_rq *rq;
+		} umr;
+	};
 };
 
 struct mlx5e_txqsq {
@@ -356,7 +377,7 @@ struct mlx5e_txqsq {
 	/* dirtied @completion */
 	u16                        cc;
 	u32                        dma_fifo_cc;
-	struct net_dim             dim; /* Adaptive Moderation */
+	struct dim                 dim; /* Adaptive Moderation */
 
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
@@ -375,6 +396,7 @@ struct mlx5e_txqsq {
 	void __iomem              *uar_map;
 	struct netdev_queue       *txq;
 	u32                        sqn;
+	u16                        stop_room;
 	u8                         min_inline_mode;
 	struct device             *pdev;
 	__be32                     mkey_be;
@@ -392,14 +414,55 @@ struct mlx5e_txqsq {
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_dma_info {
-	struct page     *page;
-	dma_addr_t      addr;
+	dma_addr_t addr;
+	union {
+		struct page *page;
+		struct {
+			u64 handle;
+			void *data;
+		} xsk;
+	};
+};
+
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+	/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+	 * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+	 * returned.
+	 */
+	MLX5E_XDP_XMIT_MODE_FRAME,
+
+	/* The xdp_frame was created in place as a result of XDP_TX from a
+	 * regular RQ. No DMA remapping happened, and the page belongs to us.
+	 */
+	MLX5E_XDP_XMIT_MODE_PAGE,
+
+	/* No xdp_frame was created at all, the transmit happened from a UMEM
+	 * page. The UMEM Completion Ring producer pointer has to be increased.
+	 */
+	MLX5E_XDP_XMIT_MODE_XSK,
 };
 
 struct mlx5e_xdp_info {
-	struct xdp_frame      *xdpf;
-	dma_addr_t            dma_addr;
-	struct mlx5e_dma_info di;
+	enum mlx5e_xdp_xmit_mode mode;
+	union {
+		struct {
+			struct xdp_frame *xdpf;
+			dma_addr_t dma_addr;
+		} frame;
+		struct {
+			struct mlx5e_rq *rq;
+			struct mlx5e_dma_info di;
+		} page;
+	};
+};
+
+struct mlx5e_xdp_xmit_data {
+	dma_addr_t  dma_addr;
+	void       *data;
+	u32         len;
 };
 
 struct mlx5e_xdp_info_fifo {
@@ -425,8 +488,12 @@ struct mlx5e_xdp_mpwqe {
 };
 
 struct mlx5e_xdpsq;
-typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
-					struct mlx5e_xdp_info*);
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
+					struct mlx5e_xdp_xmit_data *,
+					struct mlx5e_xdp_info *,
+					int);
+
 struct mlx5e_xdpsq {
 	/* data path */
 
@@ -443,8 +510,10 @@ struct mlx5e_xdpsq {
 	struct mlx5e_cq            cq;
 
 	/* read only */
+	struct xdp_umem           *umem;
 	struct mlx5_wq_cyc         wq;
 	struct mlx5e_xdpsq_stats  *stats;
+	mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
 	mlx5e_fp_xmit_xdp_frame    xmit_xdp_frame;
 	struct {
 		struct mlx5e_xdp_wqe_info *wqe_info;
@@ -487,12 +556,6 @@ struct mlx5e_icosq {
 	struct mlx5e_channel      *channel;
 } ____cacheline_aligned_in_smp;
 
-static inline bool
-mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
-{
-	return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
-}
-
 struct mlx5e_wqe_frag_info {
 	struct mlx5e_dma_info *di;
 	u32 offset;
@@ -571,9 +634,11 @@ struct mlx5e_rq {
 			u8                     log_stride_sz;
 			u8                     umr_in_progress;
 			u8                     umr_last_bulk;
+			u8                     umr_completed;
 		} mpwqe;
 	};
 	struct {
+		u16            umem_headroom;
 		u16            headroom;
 		u8             map_dir;   /* dma map direction */
 	} buff;
@@ -596,14 +661,18 @@ struct mlx5e_rq {
 	int                    ix;
 	unsigned int           hw_mtu;
 
-	struct net_dim         dim; /* Dynamic Interrupt Moderation */
+	struct dim         dim; /* Dynamic Interrupt Moderation */
 
 	/* XDP */
 	struct bpf_prog       *xdp_prog;
-	struct mlx5e_xdpsq     xdpsq;
+	struct mlx5e_xdpsq    *xdpsq;
 	DECLARE_BITMAP(flags, 8);
 	struct page_pool      *page_pool;
 
+	/* AF_XDP zero-copy */
+	struct zero_copy_allocator zca;
+	struct xdp_umem       *umem;
+
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	__be32                 mkey_be;
@@ -616,9 +685,15 @@ struct mlx5e_rq {
 	struct xdp_rxq_info    xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
+enum mlx5e_channel_state {
+	MLX5E_CHANNEL_STATE_XSK,
+	MLX5E_CHANNEL_NUM_STATES
+};
+
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq            rq;
+	struct mlx5e_xdpsq         rq_xdpsq;
 	struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_icosq         icosq;   /* internal control operations */
 	bool                       xdp;
@@ -631,6 +706,13 @@ struct mlx5e_channel {
 	/* XDP_REDIRECT */
 	struct mlx5e_xdpsq         xdpsq;
 
+	/* AF_XDP zero-copy */
+	struct mlx5e_rq            xskrq;
+	struct mlx5e_xdpsq         xsksq;
+	struct mlx5e_icosq         xskicosq;
+	/* xskicosq can be accessed from any CPU - the spinlock protects it. */
+	spinlock_t                 xskicosq_lock;
+
 	/* data path - accessed per napi poll */
 	struct irq_desc *irq_desc;
 	struct mlx5e_ch_stats     *stats;
@@ -639,6 +721,7 @@ struct mlx5e_channel {
 	struct mlx5e_priv         *priv;
 	struct mlx5_core_dev      *mdev;
 	struct hwtstamp_config    *tstamp;
+	DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
 	int                        ix;
 	int                        cpu;
 	cpumask_var_t              xps_cpumask;
@@ -654,14 +737,17 @@ struct mlx5e_channel_stats {
 	struct mlx5e_ch_stats ch;
 	struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_rq_stats rq;
+	struct mlx5e_rq_stats xskrq;
 	struct mlx5e_xdpsq_stats rq_xdpsq;
 	struct mlx5e_xdpsq_stats xdpsq;
+	struct mlx5e_xdpsq_stats xsksq;
 } ____cacheline_aligned_in_smp;
 
 enum {
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
 	MLX5E_STATE_XDP_TX_ENABLED,
+	MLX5E_STATE_XDP_OPEN,
 };
 
 struct mlx5e_rqt {
@@ -694,6 +780,17 @@ struct mlx5e_modify_sq_param {
 	int rl_index;
 };
 
+struct mlx5e_xsk {
+	/* UMEMs are stored separately from channels, because we don't want to
+	 * lose them when channels are recreated. The kernel also stores UMEMs,
+	 * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
+	 * so rely on our mechanism.
+	 */
+	struct xdp_umem **umems;
+	u16 refcnt;
+	bool ever_used;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -714,6 +811,7 @@ struct mlx5e_priv {
 	struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_tir           xsk_tir[MLX5E_MAX_NUM_CHANNELS];
 	struct mlx5e_rss_params    rss_params;
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
@@ -750,6 +848,7 @@ struct mlx5e_priv {
 	struct mlx5e_tls          *tls;
 #endif
 	struct devlink_health_reporter *tx_reporter;
+	struct mlx5e_xsk           xsk;
 };
 
 struct mlx5e_profile {
@@ -763,6 +862,7 @@ struct mlx5e_profile {
 	void	(*cleanup_tx)(struct mlx5e_priv *priv);
 	void	(*enable)(struct mlx5e_priv *priv);
 	void	(*disable)(struct mlx5e_priv *priv);
+	int	(*update_rx)(struct mlx5e_priv *priv);
 	void	(*update_stats)(struct mlx5e_priv *priv);
 	void	(*update_carrier)(struct mlx5e_priv *priv);
 	struct {
@@ -781,7 +881,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
 
 void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
-void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
 void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
@@ -793,11 +893,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params);
 
 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
-			bool recycle);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+				struct mlx5e_dma_info *dma_info,
+				bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
 bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
@@ -853,6 +955,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
 void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
 struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
 
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+		  struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+		  struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
+
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
 
@@ -898,102 +1024,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
 }
 
-struct mlx5e_swp_spec {
-	__be16 l3_proto;
-	u8 l4_proto;
-	u8 is_tun;
-	__be16 tun_l3_proto;
-	u8 tun_l4_proto;
-};
-
-static inline void
-mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
-		   struct mlx5e_swp_spec *swp_spec)
-{
-	/* SWP offsets are in 2-bytes words */
-	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
-	if (swp_spec->l3_proto == htons(ETH_P_IPV6))
-		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
-	if (swp_spec->l4_proto) {
-		eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
-		if (swp_spec->l4_proto == IPPROTO_UDP)
-			eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
-	}
-
-	if (swp_spec->is_tun) {
-		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
-		if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
-			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
-	} else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
-		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
-		if (swp_spec->l3_proto == htons(ETH_P_IPV6))
-			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
-	}
-	switch (swp_spec->tun_l4_proto) {
-	case IPPROTO_UDP:
-		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
-		/* fall through */
-	case IPPROTO_TCP:
-		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
-		break;
-	}
-}
-
-static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
-				      struct mlx5e_tx_wqe **wqe,
-				      u16 *pi)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
-	memset(*wqe, 0, sizeof(**wqe));
-}
-
-static inline
-struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
-{
-	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
-	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
-
-	memset(cseg, 0, sizeof(*cseg));
-
-	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
-	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
-
-	(*pc)++;
-
-	return wqe;
-}
-
-static inline
-void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
-		     void __iomem *uar_map,
-		     struct mlx5_wqe_ctrl_seg *ctrl)
-{
-	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
-	/* ensure wqe is visible to device before updating doorbell record */
-	dma_wmb();
-
-	*wq->db = cpu_to_be32(pc);
-
-	/* ensure doorbell record is visible to device before ringing the
-	 * doorbell
-	 */
-	wmb();
-
-	mlx5_write64((__be32 *)ctrl, uar_map);
-}
-
-static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
-{
-	struct mlx5_core_cq *mcq;
-
-	mcq = &cq->mcq;
-	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
-}
-
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
@@ -1023,17 +1053,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
 int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
 void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
-		     u32 underlay_qpn, u32 *tisn);
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
 
 int mlx5e_create_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
 void mlx5e_update_carrier(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
@@ -1075,8 +1105,6 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
 u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 			      struct ethtool_ts_info *info);
-int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
-			       struct ethtool_flash *flash);
 void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
 				  struct ethtool_pauseparam *pauseparam);
 int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
@@ -1097,6 +1125,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+			    struct mlx5e_xsk *xsk,
 			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
 			    u16 max_channels, u16 mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index d3744bffbae3..79301d116667 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -3,65 +3,102 @@
 
 #include "en/params.h"
 
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
 {
-	u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	u16 linear_rq_headroom = params->xdp_prog ?
-		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
-	u32 frag_sz;
+	return params->xdp_prog || xsk;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+				 struct mlx5e_xsk_param *xsk)
+{
+	u16 headroom = NET_IP_ALIGN;
+
+	if (mlx5e_rx_is_xdp(params, xsk)) {
+		headroom += XDP_PACKET_HEADROOM;
+		if (xsk)
+			headroom += xsk->headroom;
+	} else {
+		headroom += MLX5_RX_HEADROOM;
+	}
+
+	return headroom;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk)
+{
+	u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+	u32 frag_sz = linear_rq_headroom + hw_mtu;
 
-	linear_rq_headroom += NET_IP_ALIGN;
+	/* AF_XDP doesn't build SKBs in place. */
+	if (!xsk)
+		frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
 
-	frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+	/* XDP in mlx5e doesn't support multiple packets per page. */
+	if (mlx5e_rx_is_xdp(params, xsk))
+		frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
 
-	if (params->xdp_prog && frag_sz < PAGE_SIZE)
-		frag_sz = PAGE_SIZE;
+	/* Even if we can go with a smaller fragment size, we must not put
+	 * multiple packets into a single frame.
+	 */
+	if (xsk)
+		frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
 
 	return frag_sz;
 }
 
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk)
 {
-	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
 
 	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
 }
 
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+			    struct mlx5e_xsk_param *xsk)
 {
-	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+	/* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+	 * than one page. For this, check both with and without xsk.
+	 */
+	u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+				 mlx5e_rx_get_linear_frag_sz(params, NULL));
 
-	return !params->lro_en && frag_sz <= PAGE_SIZE;
+	return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
 }
 
 #define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
 					  MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
 bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
-				  struct mlx5e_params *params)
+				  struct mlx5e_params *params,
+				  struct mlx5e_xsk_param *xsk)
 {
-	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
 	s8 signed_log_num_strides_param;
 	u8 log_num_strides;
 
-	if (!mlx5e_rx_is_linear_skb(params))
+	if (!mlx5e_rx_is_linear_skb(params, xsk))
 		return false;
 
-	if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+	if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
 		return false;
 
 	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
 		return true;
 
-	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
 	signed_log_num_strides_param =
 		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
 
 	return signed_log_num_strides_param >= 0;
 }
 
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+			       struct mlx5e_xsk_param *xsk)
 {
-	u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+	u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
 
 	/* Numbers are unsigned, don't subtract to avoid underflow. */
 	if (params->log_rq_mtu_frames <
@@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
 }
 
 u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
 {
-	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
-		return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+		return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
 
 	return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
 }
 
 u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
 {
 	return MLX5_MPWRQ_LOG_WQE_SZ -
-		mlx5e_mpwqe_get_log_stride_size(mdev, params);
+		mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
 }
 
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
-			  struct mlx5e_params *params)
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk)
 {
-	u16 linear_rq_headroom = params->xdp_prog ?
-		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
-	bool is_linear_skb;
-
-	linear_rq_headroom += NET_IP_ALIGN;
-
-	is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
-		mlx5e_rx_is_linear_skb(params) :
-		mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+	bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+		mlx5e_rx_is_linear_skb(params, xsk) :
+		mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
 
-	return is_linear_skb ? linear_rq_headroom : 0;
+	return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index b106a0236f36..bd882b5ee9a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -6,17 +6,119 @@
 
 #include "en.h"
 
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+struct mlx5e_xsk_param {
+	u16 headroom;
+	u16 chunk_size;
+};
+
+struct mlx5e_rq_param {
+	u32                        rqc[MLX5_ST_SZ_DW(rqc)];
+	struct mlx5_wq_param       wq;
+	struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
+	struct mlx5_wq_param       wq;
+	bool                       is_mpw;
+};
+
+struct mlx5e_cq_param {
+	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
+	struct mlx5_wq_param       wq;
+	u16                        eq_ix;
+	u8                         cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+	struct mlx5e_rq_param      rq;
+	struct mlx5e_sq_param      sq;
+	struct mlx5e_sq_param      xdp_sq;
+	struct mlx5e_sq_param      icosq;
+	struct mlx5e_cq_param      rx_cq;
+	struct mlx5e_cq_param      tx_cq;
+	struct mlx5e_cq_param      icosq_cq;
+};
+
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+						u16 qid,
+						enum mlx5e_rq_group group,
+						u16 *ix)
+{
+	int nch = params->num_channels;
+	int ch = qid - nch * group;
+
+	if (ch < 0 || ch >= nch)
+		return false;
+
+	*ix = ch;
+	return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+					      u16 qid,
+					      u16 *ix,
+					      enum mlx5e_rq_group *group)
+{
+	u16 nch = params->num_channels;
+
+	*ix = qid % nch;
+	*group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+{
+	return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+}
+
+/* Parameter calculations */
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+				 struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+			    struct mlx5e_xsk_param *xsk);
 bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
-				  struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+				  struct mlx5e_params *params,
+				  struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+			       struct mlx5e_xsk_param *xsk);
 u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params);
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk);
 u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params);
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk);
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
-			  struct mlx5e_params *params);
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+				 struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk,
+			     struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+			      u8 log_wq_size,
+			      struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+			     u8 log_wq_size,
+			     struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_sq_param *param);
 
 #endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 231e7cdfc6f7..a6a52806be45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -3,8 +3,22 @@
 
 #include <net/vxlan.h>
 #include <net/gre.h>
-#include "lib/vxlan.h"
+#include <net/geneve.h>
 #include "en/tc_tun.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+	if (netif_is_vxlan(tunnel_dev))
+		return &vxlan_tunnel;
+	else if (netif_is_geneve(tunnel_dev))
+		return &geneve_tunnel;
+	else if (netif_is_gretap(tunnel_dev) ||
+		 netif_is_ip6gretap(tunnel_dev))
+		return &gre_tunnel;
+	else
+		return NULL;
+}
 
 static int get_route_and_out_devs(struct mlx5e_priv *priv,
 				  struct net_device *dev,
@@ -34,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
 		*route_dev = dev;
 		if (is_vlan_dev(*route_dev))
 			*out_dev = uplink_dev;
-		else if (mlx5e_eswitch_rep(dev))
+		else if (mlx5e_eswitch_rep(dev) &&
+			 mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
 			*out_dev = *route_dev;
 		else
 			return -EOPNOTSUPP;
@@ -142,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
-{
-	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
-	struct udphdr *udp = (struct udphdr *)(buf);
-	struct vxlanhdr *vxh = (struct vxlanhdr *)
-			       ((char *)udp + sizeof(struct udphdr));
-
-	udp->dest = tun_key->tp_dst;
-	vxh->vx_flags = VXLAN_HF_VNI;
-	vxh->vx_vni = vxlan_vni_field(tun_id);
-
-	return 0;
-}
-
-static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
-{
-	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
-	int hdr_len;
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
-
-	/* the HW does not calculate GRE csum or sequences */
-	if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
-		return -EOPNOTSUPP;
-
-	greh->protocol = htons(ETH_P_TEB);
-
-	/* GRE key */
-	hdr_len = gre_calc_hlen(tun_key->tun_flags);
-	greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
-	if (tun_key->tun_flags & TUNNEL_KEY) {
-		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
-		*ptr = tun_id;
-	}
-
-	return 0;
-}
-
 static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
 				      struct mlx5e_encap_entry *e)
 {
-	int err = 0;
-	struct ip_tunnel_key *key = &e->tun_info.key;
-
-	if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
-		*ip_proto = IPPROTO_UDP;
-		err = mlx5e_gen_vxlan_header(buf, key);
-	} else if  (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
-		*ip_proto = IPPROTO_GRE;
-		err = mlx5e_gen_gre_header(buf, key);
-	} else {
-		pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
-			, e->tunnel_type);
-		err = -EOPNOTSUPP;
+	if (!e->tunnel) {
+		pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+		return -EOPNOTSUPP;
 	}
 
-	return err;
+	return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
 }
 
 static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
@@ -230,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 				    struct mlx5e_encap_entry *e)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
-	struct ip_tunnel_key *tun_key = &e->tun_info.key;
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 	struct net_device *out_dev, *route_dev;
 	struct neighbour *n = NULL;
 	struct flowi4 fl4 = {};
@@ -254,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 	ipv4_encap_size =
 		(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 		sizeof(struct iphdr) +
-		e->tunnel_hlen;
+		e->tunnel->calc_hlen(e);
 
 	if (max_encap_size < ipv4_encap_size) {
 		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -346,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 				    struct mlx5e_encap_entry *e)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
-	struct ip_tunnel_key *tun_key = &e->tun_info.key;
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 	struct net_device *out_dev, *route_dev;
 	struct neighbour *n = NULL;
 	struct flowi6 fl6 = {};
@@ -370,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 	ipv6_encap_size =
 		(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 		sizeof(struct ipv6hdr) +
-		e->tunnel_hlen;
+		e->tunnel->calc_hlen(e);
 
 	if (max_encap_size < ipv6_encap_size) {
 		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -456,27 +423,12 @@ out:
 	return err;
 }
 
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
-{
-	if (netif_is_vxlan(tunnel_dev))
-		return MLX5E_TC_TUNNEL_TYPE_VXLAN;
-	else if (netif_is_gretap(tunnel_dev) ||
-		 netif_is_ip6gretap(tunnel_dev))
-		return MLX5E_TC_TUNNEL_TYPE_GRETAP;
-	else
-		return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
-}
-
 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
 				    struct net_device *netdev)
 {
-	int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
 
-	if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
-	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
-		return true;
-	else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
-		 MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+	if (tunnel && tunnel->can_offload(priv))
 		return true;
 	else
 		return false;
@@ -487,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
 				 struct mlx5e_encap_entry *e,
 				 struct netlink_ext_ack *extack)
 {
-	e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
 
-	if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
-		int dst_port =  be16_to_cpu(e->tun_info.key.tp_dst);
-
-		if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "vxlan udp dport was not registered with the HW");
-			netdev_warn(priv->netdev,
-				    "%d isn't an offloaded vxlan udp dport\n",
-				    dst_port);
-			return -EOPNOTSUPP;
-		}
-		e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
-		e->tunnel_hlen = VXLAN_HLEN;
-	} else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
-		e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
-		e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
-	} else {
+	if (!tunnel) {
 		e->reformat_type = -1;
-		e->tunnel_hlen = -1;
 		return -EOPNOTSUPP;
 	}
-	return 0;
+
+	return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
 }
 
-static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
-				    struct mlx5_flow_spec *spec,
-				    struct tc_cls_flower_offload *f,
-				    void *headers_c,
-				    void *headers_v)
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+		       struct mlx5e_priv *priv,
+		       struct mlx5_flow_spec *spec,
+		       struct flow_cls_offload *f,
+		       void *headers_c,
+		       void *headers_v, u8 *match_level)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+	int err = 0;
+
+	if (!tunnel) {
+		netdev_warn(priv->netdev,
+			    "decapsulation offload is not supported for %s net device\n",
+			    mlx5e_netdev_kind(filter_dev));
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	*match_level = tunnel->match_level;
+
+	if (tunnel->parse_udp_ports) {
+		err = tunnel->parse_udp_ports(priv, spec, f,
+					      headers_c, headers_v);
+		if (err)
+			goto out;
+	}
+
+	if (tunnel->parse_tunnel) {
+		err = tunnel->parse_tunnel(priv, spec, f,
+					   headers_c, headers_v);
+		if (err)
+			goto out;
+	}
+
+out:
+	return err;
+}
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+				 struct mlx5_flow_spec *spec,
+				 struct flow_cls_offload *f,
+				 void *headers_c,
+				 void *headers_v)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct netlink_ext_ack *extack = f->common.extack;
-	void *misc_c = MLX5_ADDR_OF(fte_match_param,
-				    spec->match_criteria,
-				    misc_parameters);
-	void *misc_v = MLX5_ADDR_OF(fte_match_param,
-				    spec->match_value,
-				    misc_parameters);
 	struct flow_match_ports enc_ports;
 
-	flow_rule_match_enc_ports(rule, &enc_ports);
-
 	/* Full udp dst port must be given */
-	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
-	    memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) {
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "VXLAN decap filter must include enc_dst_port condition");
+				   "UDP tunnel decap filter must include enc_dst_port condition");
 		netdev_warn(priv->netdev,
-			    "VXLAN decap filter must include enc_dst_port condition\n");
+			    "UDP tunnel decap filter must include enc_dst_port condition\n");
 		return -EOPNOTSUPP;
 	}
 
-	/* udp dst port must be knonwn as a VXLAN port */
-	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) {
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	if (memchr_inv(&enc_ports.mask->dst, 0xff,
+		       sizeof(enc_ports.mask->dst))) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Matched UDP port is not registered as a VXLAN port");
+				   "UDP tunnel decap filter must match enc_dst_port fully");
 		netdev_warn(priv->netdev,
-			    "UDP port %d is not registered as a VXLAN port\n",
-			    be16_to_cpu(enc_ports.key->dst));
+			    "UDP tunnel decap filter must match enc_dst_port fully\n");
 		return -EOPNOTSUPP;
 	}
 
-	/* dst UDP port is valid here */
+	/* match on UDP protocol and dst port number */
+
 	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
 	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
 
@@ -560,92 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
 	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 		 ntohs(enc_ports.key->dst));
 
+	/* UDP src port on outer header is generated by HW,
+	 * so it is probably a bad idea to request matching it.
+	 * Nonetheless, it is allowed.
+	 */
+
 	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 		 ntohs(enc_ports.mask->src));
 	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 		 ntohs(enc_ports.key->src));
 
-	/* match on VNI */
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_match_enc_keyid enc_keyid;
-
-		flow_rule_match_enc_keyid(rule, &enc_keyid);
-
-		MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
-			 be32_to_cpu(enc_keyid.mask->keyid));
-		MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
-			 be32_to_cpu(enc_keyid.key->keyid));
-	}
-	return 0;
-}
-
-static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
-				     struct mlx5_flow_spec *spec,
-				     struct tc_cls_flower_offload *f,
-				     void *outer_headers_c,
-				     void *outer_headers_v)
-{
-	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-				    misc_parameters);
-	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    misc_parameters);
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
-
-	if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
-		NL_SET_ERR_MSG_MOD(f->common.extack,
-				   "GRE HW offloading is not supported");
-		netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
-	MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
-		 ip_protocol, IPPROTO_GRE);
-
-	/* gre protocol*/
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
-	MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
-
-	/* gre key */
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_match_enc_keyid enc_keyid;
-
-		flow_rule_match_enc_keyid(rule, &enc_keyid);
-		MLX5_SET(fte_match_set_misc, misc_c,
-			 gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
-		MLX5_SET(fte_match_set_misc, misc_v,
-			 gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
-	}
-
 	return 0;
 }
-
-int mlx5e_tc_tun_parse(struct net_device *filter_dev,
-		       struct mlx5e_priv *priv,
-		       struct mlx5_flow_spec *spec,
-		       struct tc_cls_flower_offload *f,
-		       void *headers_c,
-		       void *headers_v, u8 *match_level)
-{
-	int tunnel_type;
-	int err = 0;
-
-	tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
-	if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
-		*match_level = MLX5_MATCH_L4;
-		err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
-					       headers_c, headers_v);
-	} else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
-		*match_level = MLX5_MATCH_L3;
-		err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
-						headers_c, headers_v);
-	} else {
-		netdev_warn(priv->netdev,
-			    "decapsulation offload is not supported for %s (kind: \"%s\")\n",
-			    netdev_name(filter_dev),
-			    mlx5e_netdev_kind(filter_dev));
-
-		return -EOPNOTSUPP;
-	}
-	return err;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index b63f15de899d..c362b9225dc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -14,9 +14,41 @@
 enum {
 	MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
 	MLX5E_TC_TUNNEL_TYPE_VXLAN,
-	MLX5E_TC_TUNNEL_TYPE_GRETAP
+	MLX5E_TC_TUNNEL_TYPE_GENEVE,
+	MLX5E_TC_TUNNEL_TYPE_GRETAP,
 };
 
+struct mlx5e_tc_tunnel {
+	int tunnel_type;
+	enum mlx5_flow_match_level match_level;
+
+	bool (*can_offload)(struct mlx5e_priv *priv);
+	int (*calc_hlen)(struct mlx5e_encap_entry *e);
+	int (*init_encap_attr)(struct net_device *tunnel_dev,
+			       struct mlx5e_priv *priv,
+			       struct mlx5e_encap_entry *e,
+			       struct netlink_ext_ack *extack);
+	int (*generate_ip_tun_hdr)(char buf[],
+				   __u8 *ip_proto,
+				   struct mlx5e_encap_entry *e);
+	int (*parse_udp_ports)(struct mlx5e_priv *priv,
+			       struct mlx5_flow_spec *spec,
+			       struct flow_cls_offload *f,
+			       void *headers_c,
+			       void *headers_v);
+	int (*parse_tunnel)(struct mlx5e_priv *priv,
+			    struct mlx5_flow_spec *spec,
+			    struct flow_cls_offload *f,
+			    void *headers_c,
+			    void *headers_v);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
 int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
 				 struct mlx5e_priv *priv,
 				 struct mlx5e_encap_entry *e,
@@ -30,15 +62,20 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 				    struct net_device *mirred_dev,
 				    struct mlx5e_encap_entry *e);
 
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
 				    struct net_device *netdev);
 
 int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 		       struct mlx5e_priv *priv,
 		       struct mlx5_flow_spec *spec,
-		       struct tc_cls_flower_offload *f,
+		       struct flow_cls_offload *f,
 		       void *headers_c,
 		       void *headers_v, u8 *match_level);
 
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+				 struct mlx5_flow_spec *spec,
+				 struct flow_cls_offload *f,
+				 void *headers_c,
+				 void *headers_v);
+
 #endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 000000000000..951ea26d96bc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+	return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+	return sizeof(struct udphdr) +
+	       sizeof(struct genevehdr) +
+	       e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+					       struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_ports enc_ports;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+		return -EOPNOTSUPP;
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	/* Currently we support only default GENEVE
+	 * port, so udp dst port must match.
+	 */
+	if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matched UDP dst port is not registered as a GENEVE port");
+		netdev_warn(priv->netdev,
+			    "UDP port %d is not registered as a GENEVE port\n",
+			    be16_to_cpu(enc_ports.key->dst));
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+					       struct mlx5_flow_spec *spec,
+					       struct flow_cls_offload *f,
+					       void *headers_c,
+					       void *headers_v)
+{
+	int err;
+
+	err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+					       struct mlx5e_priv *priv,
+					       struct mlx5e_encap_entry *e,
+					       struct netlink_ext_ack *extack)
+{
+	e->tunnel = &geneve_tunnel;
+
+	/* Reformat type for GENEVE encap is similar to VXLAN:
+	 * in both cases the HW adds in the same place a
+	 * defined encapsulation header that the SW provides.
+	 */
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+	return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+	vni[0] = (__force __u8)(tun_id >> 16);
+	vni[1] = (__force __u8)(tun_id >> 8);
+	vni[2] = (__force __u8)tun_id;
+#else
+	vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+	vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+	vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+					     __u8 *ip_proto,
+					     struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_info *tun_info = e->tun_info;
+	struct udphdr *udp = (struct udphdr *)(buf);
+	struct genevehdr *geneveh;
+
+	geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+	*ip_proto = IPPROTO_UDP;
+
+	udp->dest = tun_info->key.tp_dst;
+
+	memset(geneveh, 0, sizeof(*geneveh));
+	geneveh->ver = MLX5E_GENEVE_VER;
+	geneveh->opt_len = tun_info->options_len / 4;
+	geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+	geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+	mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+	geneveh->proto_type = htons(ETH_P_TEB);
+
+	if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+		if (!geneveh->opt_len)
+			return -EOPNOTSUPP;
+		ip_tunnel_info_opts_get(geneveh->options, tun_info);
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+					 struct mlx5_flow_spec *spec,
+					 struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_enc_keyid enc_keyid;
+	void *misc_c, *misc_v;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+		return 0;
+
+	flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+	if (!enc_keyid.mask->keyid)
+		return 0;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+		netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+					     struct mlx5_flow_spec *spec,
+					     struct flow_cls_offload *f)
+{
+	u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+	u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+	struct geneve_opt *option_key, *option_mask;
+	__be32 opt_data_key = 0, opt_data_mask = 0;
+	struct flow_match_enc_opts enc_opts;
+	int res = 0;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+	misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+		return 0;
+
+	flow_rule_match_enc_opts(rule, &enc_opts);
+
+	if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.geneve_tlv_option_0_data)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* make sure that we're talking about GENEVE options */
+
+	if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: option type is not GENEVE");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: option type is not GENEVE\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (enc_opts.mask->len &&
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.outer_geneve_opt_len)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options len is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+	 * doesn't include the TLV option header. 'geneve_opt_len' is a total
+	 * len of all the options, including the headers, also multiples of 4
+	 * bytes. Len that comes from the dissector is in bytes.
+	 */
+
+	if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: unsupported options len");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: unsupported options len (len=%d)\n",
+			    enc_opts.key->len);
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+	/* we support matching on one option only, so just get it */
+	option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+	option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+	if (option_key->length > max_tlv_option_data_len) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: unsupported option len");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+			    option_key->length, option_mask->length);
+		return -EOPNOTSUPP;
+	}
+
+	/* data can't be all 0 - fail to offload such rule */
+	if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: can't match on 0 data field");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: can't match on 0 data field\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* add new GENEVE TLV options object */
+	res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+	if (res) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: failed creating TLV opt object");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+			    be16_to_cpu(option_key->opt_class),
+			    option_key->type, option_key->length);
+		return res;
+	}
+
+	/* In general, after creating the object, need to query it
+	 * in order to check which option data to set in misc3.
+	 * But we support only geneve_tlv_option_0_data, so no
+	 * point querying at this stage.
+	 */
+
+	memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+	memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+	MLX5_SET(fte_match_set_misc3, misc_3_v,
+		 geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+	MLX5_SET(fte_match_set_misc3, misc_3_c,
+		 geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+					    struct mlx5_flow_spec *spec,
+					    struct flow_cls_offload *f)
+{
+	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,  misc_parameters);
+	struct netlink_ext_ack *extack = f->common.extack;
+
+	/* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+		netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+		return -EOPNOTSUPP;
+	}
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+	/* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+				       ft_field_support.outer_geneve_protocol_type)) {
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+		MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+				     struct mlx5_flow_spec *spec,
+				     struct flow_cls_offload *f,
+				     void *headers_c,
+				     void *headers_v)
+{
+	int err;
+
+	err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+	if (err)
+		return err;
+
+	err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+	.match_level          = MLX5_MATCH_L4,
+	.can_offload          = mlx5e_tc_tun_can_offload_geneve,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_geneve,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_geneve,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_geneve,
+	.parse_udp_ports      = mlx5e_tc_tun_parse_udp_ports_geneve,
+	.parse_tunnel         = mlx5e_tc_tun_parse_geneve,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 000000000000..58b13192df23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+	return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+	return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+					       struct mlx5e_priv *priv,
+					       struct mlx5e_encap_entry *e,
+					       struct netlink_ext_ack *extack)
+{
+	e->tunnel = &gre_tunnel;
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+	return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+					     __u8 *ip_proto,
+					     struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_key *tun_key  = &e->tun_info->key;
+	struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+	int hdr_len;
+
+	*ip_proto = IPPROTO_GRE;
+
+	/* the HW does not calculate GRE csum or sequences */
+	if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+		return -EOPNOTSUPP;
+
+	greh->protocol = htons(ETH_P_TEB);
+
+	/* GRE key */
+	hdr_len	= mlx5e_tc_tun_calc_hlen_gretap(e);
+	greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+	if (tun_key->tun_flags & TUNNEL_KEY) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+		*ptr = tun_id;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+				     struct mlx5_flow_spec *spec,
+				     struct flow_cls_offload *f,
+				     void *headers_c,
+				     void *headers_v)
+{
+	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+	/* gre protocol */
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+	MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+	/* gre key */
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid enc_keyid;
+
+		flow_rule_match_enc_keyid(rule, &enc_keyid);
+		MLX5_SET(fte_match_set_misc, misc_c,
+			 gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+		MLX5_SET(fte_match_set_misc, misc_v,
+			 gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+	}
+
+	return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+	.match_level          = MLX5_MATCH_L3,
+	.can_offload          = mlx5e_tc_tun_can_offload_gretap,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_gretap,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_gretap,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_gretap,
+	.parse_udp_ports      = NULL,
+	.parse_tunnel         = mlx5e_tc_tun_parse_gretap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 000000000000..37b176801bcc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+	return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+	return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+					      struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_ports enc_ports;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+		return -EOPNOTSUPP;
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	/* check the UDP destination port validity */
+
+	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+				    be16_to_cpu(enc_ports.key->dst))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matched UDP dst port is not registered as a VXLAN port");
+		netdev_warn(priv->netdev,
+			    "UDP port %d is not registered as a VXLAN port\n",
+			    be16_to_cpu(enc_ports.key->dst));
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+					      struct mlx5_flow_spec *spec,
+					      struct flow_cls_offload *f,
+					      void *headers_c,
+					      void *headers_v)
+{
+	int err = 0;
+
+	err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+					      struct mlx5e_priv *priv,
+					      struct mlx5e_encap_entry *e,
+					      struct netlink_ext_ack *extack)
+{
+	int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+	e->tunnel = &vxlan_tunnel;
+
+	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "vxlan udp dport was not registered with the HW");
+		netdev_warn(priv->netdev,
+			    "%d isn't an offloaded vxlan udp dport\n",
+			    dst_port);
+		return -EOPNOTSUPP;
+	}
+
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+	return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+					    __u8 *ip_proto,
+					    struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+	struct udphdr *udp = (struct udphdr *)(buf);
+	struct vxlanhdr *vxh;
+
+	vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+	*ip_proto = IPPROTO_UDP;
+
+	udp->dest = tun_key->tp_dst;
+	vxh->vx_flags = VXLAN_HF_VNI;
+	vxh->vx_vni = vxlan_vni_field(tun_id);
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+				    struct mlx5_flow_spec *spec,
+				    struct flow_cls_offload *f,
+				    void *headers_c,
+				    void *headers_v)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_enc_keyid enc_keyid;
+	void *misc_c, *misc_v;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+		return 0;
+
+	flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+	if (!enc_keyid.mask->keyid)
+		return 0;
+
+	/* match on VNI is required */
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.outer_vxlan_vni)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on VXLAN VNI is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on VXLAN VNI is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+		 be32_to_cpu(enc_keyid.mask->keyid));
+	MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+		 be32_to_cpu(enc_keyid.key->keyid));
+
+	return 0;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+	.match_level          = MLX5_MATCH_L4,
+	.can_offload          = mlx5e_tc_tun_can_offload_vxlan,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_vxlan,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_vxlan,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_vxlan,
+	.parse_udp_ports      = mlx5e_tc_tun_parse_udp_ports_vxlan,
+	.parse_tunnel         = mlx5e_tc_tun_parse_vxlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000000..ddfe19adb3d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+
+#define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+			    MLX5E_SQ_NOPS_ROOM)
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_TLS_ROOM (0)
+#else
+/* TLS offload requires additional stop_room for:
+ *  - a resync SKB.
+ * kTLS offload requires additional stop_room for:
+ * - static params WQE,
+ * - progress params WQE, and
+ * - resync DUMP per frag.
+ */
+#define MLX5E_SQ_TLS_ROOM  \
+	(MLX5_SEND_WQE_MAX_WQEBBS + \
+	 MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
+	 MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+#endif
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+	return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *
+mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	void *wqe;
+
+	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	memset(wqe, 0, size);
+
+	return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
+	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
+
+	memset(cseg, 0, sizeof(*cseg));
+
+	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
+
+	(*pc)++;
+
+	return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
+	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
+
+	memset(cseg, 0, sizeof(*cseg));
+
+	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
+	cseg->fm_ce_se         = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+	(*pc)++;
+
+	return wqe;
+}
+
+static inline void
+mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
+			u16 pi, u16 nnops)
+{
+	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+	edge_wi = wi + nnops;
+
+	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
+	for (; wi < edge_wi; wi++) {
+		wi->skb        = NULL;
+		wi->num_wqebbs = 1;
+		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	}
+	sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+		struct mlx5_wqe_ctrl_seg *ctrl)
+{
+	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	/* ensure wqe is visible to device before updating doorbell record */
+	dma_wmb();
+
+	*wq->db = cpu_to_be32(pc);
+
+	/* ensure doorbell record is visible to device before ringing the
+	 * doorbell
+	 */
+	wmb();
+
+	mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe)
+{
+	return !!wqe->ctrl.tisn;
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+	struct mlx5_core_cq *mcq;
+
+	mcq = &cq->mcq;
+	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+	return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+	       enum mlx5e_dma_map_type map_type)
+{
+	struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+	dma->addr = addr;
+	dma->size = size;
+	dma->type = map_type;
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+	switch (dma->type) {
+	case MLX5E_DMA_MAP_SINGLE:
+		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	case MLX5E_DMA_MAP_PAGE:
+		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	default:
+		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+	}
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+	__be16 l3_proto;
+	u8 l4_proto;
+	u8 is_tun;
+	__be16 tun_l3_proto;
+	u8 tun_l4_proto;
+};
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+		   struct mlx5e_swp_spec *swp_spec)
+{
+	/* SWP offsets are in 2-bytes words */
+	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+	if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+	if (swp_spec->l4_proto) {
+		eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+		if (swp_spec->l4_proto == IPPROTO_UDP)
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+	}
+
+	if (swp_spec->is_tun) {
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+	} else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+		if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+	}
+	switch (swp_spec->tun_l4_proto) {
+	case IPPROTO_UDP:
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		/* fall through */
+	case IPPROTO_TCP:
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		break;
+	}
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index eb8ef78e5626..b0b982cf69bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,11 +31,13 @@
  */
 
 #include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
 #include "en/xdp.h"
+#include "en/params.h"
 
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
 {
-	int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+	int hr = mlx5e_get_linear_rq_headroom(params, xsk);
 
 	/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
 	 * The condition checked in mlx5e_rx_is_linear_skb is:
@@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
 }
 
 static inline bool
-mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
-		    struct xdp_buff *xdp)
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+		    struct mlx5e_dma_info *di, struct xdp_buff *xdp)
 {
+	struct mlx5e_xdp_xmit_data xdptxd;
 	struct mlx5e_xdp_info xdpi;
+	struct xdp_frame *xdpf;
+	dma_addr_t dma_addr;
 
-	xdpi.xdpf = convert_to_xdp_frame(xdp);
-	if (unlikely(!xdpi.xdpf))
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
 		return false;
-	xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
-	dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
-				   xdpi.xdpf->len, PCI_DMA_TODEVICE);
-	xdpi.di = *di;
 
-	return sq->xmit_xdp_frame(sq, &xdpi);
+	xdptxd.data = xdpf->data;
+	xdptxd.len  = xdpf->len;
+
+	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+		/* The xdp_buff was in the UMEM and was copied into a newly
+		 * allocated page. The UMEM page was returned via the ZCA, and
+		 * this new page has to be mapped at this point and has to be
+		 * unmapped and returned via xdp_return_frame on completion.
+		 */
+
+		/* Prevent double recycling of the UMEM page. Even in case this
+		 * function returns false, the xdp_buff shouldn't be recycled,
+		 * as it was already done in xdp_convert_zc_to_xdp_frame.
+		 */
+		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+		xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+		dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(sq->pdev, dma_addr)) {
+			xdp_return_frame(xdpf);
+			return false;
+		}
+
+		xdptxd.dma_addr     = dma_addr;
+		xdpi.frame.xdpf     = xdpf;
+		xdpi.frame.dma_addr = dma_addr;
+	} else {
+		/* Driver assumes that convert_to_xdp_frame returns an xdp_frame
+		 * that points to the same memory region as the original
+		 * xdp_buff. It allows to map the memory only once and to use
+		 * the DMA_BIDIRECTIONAL mode.
+		 */
+
+		xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+
+		dma_addr = di->addr + (xdpf->data - (void *)xdpf);
+		dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
+					   DMA_TO_DEVICE);
+
+		xdptxd.dma_addr = dma_addr;
+		xdpi.page.rq    = rq;
+		xdpi.page.di    = *di;
+	}
+
+	return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
 }
 
 /* returns true if packet was consumed by xdp */
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len)
+		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
 {
 	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
 	struct xdp_buff xdp;
@@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 	xdp.data_hard_start = va;
+	if (xsk)
+		xdp.handle = di->xsk.handle;
 	xdp.rxq = &rq->xdp_rxq;
 
 	act = bpf_prog_run_xdp(prog, &xdp);
+	if (xsk)
+		xdp.handle += xdp.data - xdp.data_hard_start;
 	switch (act) {
 	case XDP_PASS:
 		*rx_headroom = xdp.data - xdp.data_hard_start;
 		*len = xdp.data_end - xdp.data;
 		return false;
 	case XDP_TX:
-		if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
 		return true;
@@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
 		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
-		mlx5e_page_dma_unmap(rq, di);
+		if (!xsk)
+			mlx5e_page_dma_unmap(rq, di);
 		rq->stats->xdp_redirect++;
 		return true;
 	default:
@@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
 	stats->mpwqe++;
 }
 
-static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
 {
 	struct mlx5_wq_cyc       *wq    = &sq->wq;
 	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
@@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
 	session->wqe = NULL; /* Close session */
 }
 
+enum {
+	MLX5E_XDP_CHECK_OK = 1,
+	MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+	if (unlikely(!sq->mpwqe.wqe)) {
+		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+						     MLX5_SEND_WQE_MAX_WQEBBS))) {
+			/* SQ is full, ring doorbell */
+			mlx5e_xmit_xdp_doorbell(sq);
+			sq->stats->full++;
+			return -EBUSY;
+		}
+
+		return MLX5E_XDP_CHECK_START_MPWQE;
+	}
+
+	return MLX5E_XDP_CHECK_OK;
+}
+
 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
-				       struct mlx5e_xdp_info *xdpi)
+				       struct mlx5e_xdp_xmit_data *xdptxd,
+				       struct mlx5e_xdp_info *xdpi,
+				       int check_result)
 {
 	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
 
-	struct xdp_frame *xdpf = xdpi->xdpf;
-
-	if (unlikely(sq->hw_mtu < xdpf->len)) {
+	if (unlikely(xdptxd->len > sq->hw_mtu)) {
 		stats->err++;
 		return false;
 	}
 
-	if (unlikely(!session->wqe)) {
-		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
-						     MLX5_SEND_WQE_MAX_WQEBBS))) {
-			/* SQ is full, ring doorbell */
-			mlx5e_xmit_xdp_doorbell(sq);
-			stats->full++;
-			return false;
-		}
+	if (!check_result)
+		check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+	if (unlikely(check_result < 0))
+		return false;
 
+	if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+		/* Start the session when nothing can fail, so it's guaranteed
+		 * that if there is an active session, it has at least one dseg,
+		 * and it's safe to complete it at any time.
+		 */
 		mlx5e_xdp_mpwqe_session_start(sq);
 	}
 
-	mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);
+	mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
 
 	if (unlikely(session->complete ||
 		     session->ds_count == session->max_ds_count))
@@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
 	return true;
 }
 
-static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+		/* SQ is full, ring doorbell */
+		mlx5e_xmit_xdp_doorbell(sq);
+		sq->stats->full++;
+		return -EBUSY;
+	}
+
+	return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+				 struct mlx5e_xdp_xmit_data *xdptxd,
+				 struct mlx5e_xdp_info *xdpi,
+				 int check_result)
 {
 	struct mlx5_wq_cyc       *wq   = &sq->wq;
 	u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
 	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
 	struct mlx5_wqe_data_seg *dseg = wqe->data;
 
-	struct xdp_frame *xdpf = xdpi->xdpf;
-	dma_addr_t dma_addr  = xdpi->dma_addr;
-	unsigned int dma_len = xdpf->len;
+	dma_addr_t dma_addr = xdptxd->dma_addr;
+	u32 dma_len = xdptxd->len;
 
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
 
@@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
 		return false;
 	}
 
-	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
-		/* SQ is full, ring doorbell */
-		mlx5e_xmit_xdp_doorbell(sq);
-		stats->full++;
+	if (!check_result)
+		check_result = mlx5e_xmit_xdp_frame_check(sq);
+	if (unlikely(check_result < 0))
 		return false;
-	}
 
 	cseg->fm_ce_se = 0;
 
 	/* copy the inline part if required */
 	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-		memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+		memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
 		eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
 		dma_len  -= MLX5E_XDP_MIN_INLINE;
 		dma_addr += MLX5E_XDP_MIN_INLINE;
@@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
 
 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 				  struct mlx5e_xdp_wqe_info *wi,
-				  struct mlx5e_rq *rq,
+				  u32 *xsk_frames,
 				  bool recycle)
 {
 	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 	for (i = 0; i < wi->num_pkts; i++) {
 		struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
 
-		if (rq) {
-			/* XDP_TX */
-			mlx5e_page_release(rq, &xdpi.di, recycle);
-		} else {
-			/* XDP_REDIRECT */
-			dma_unmap_single(sq->pdev, xdpi.dma_addr,
-					 xdpi.xdpf->len, DMA_TO_DEVICE);
-			xdp_return_frame(xdpi.xdpf);
+		switch (xdpi.mode) {
+		case MLX5E_XDP_XMIT_MODE_FRAME:
+			/* XDP_TX from the XSK RQ and XDP_REDIRECT */
+			dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+					 xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+			xdp_return_frame(xdpi.frame.xdpf);
+			break;
+		case MLX5E_XDP_XMIT_MODE_PAGE:
+			/* XDP_TX from the regular RQ */
+			mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+			break;
+		case MLX5E_XDP_XMIT_MODE_XSK:
+			/* AF_XDP send */
+			(*xsk_frames)++;
+			break;
+		default:
+			WARN_ON_ONCE(true);
 		}
 	}
 }
 
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_xdpsq *sq;
 	struct mlx5_cqe64 *cqe;
+	u32 xsk_frames = 0;
 	u16 sqcc;
 	int i;
 
@@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
 
 			sqcc += wi->num_wqebbs;
 
-			mlx5e_free_xdpsq_desc(sq, wi, rq, true);
+			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
 		} while (!last_wqe);
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
+	if (xsk_frames)
+		xsk_umem_complete_tx(sq->umem, xsk_frames);
+
 	sq->stats->cqes += i;
 
 	mlx5_cqwq_update_db_record(&cq->wq);
@@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
 
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 {
+	u32 xsk_frames = 0;
+
 	while (sq->cc != sq->pc) {
 		struct mlx5e_xdp_wqe_info *wi;
 		u16 ci;
@@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
 
 		sq->cc += wi->num_wqebbs;
 
-		mlx5e_free_xdpsq_desc(sq, wi, rq, false);
+		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
 	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(sq->umem, xsk_frames);
 }
 
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
+		struct mlx5e_xdp_xmit_data xdptxd;
 		struct mlx5e_xdp_info xdpi;
 
-		xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
-					       DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+		xdptxd.data = xdpf->data;
+		xdptxd.len = xdpf->len;
+		xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+						 xdptxd.len, DMA_TO_DEVICE);
+
+		if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
 			continue;
 		}
 
-		xdpi.xdpf = xdpf;
+		xdpi.mode           = MLX5E_XDP_XMIT_MODE_FRAME;
+		xdpi.frame.xdpf     = xdpf;
+		xdpi.frame.dma_addr = xdptxd.dma_addr;
 
-		if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
-			dma_unmap_single(sq->pdev, xdpi.dma_addr,
-					 xdpf->len, DMA_TO_DEVICE);
+		if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+			dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+					 xdptxd.len, DMA_TO_DEVICE);
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
 		}
@@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
 {
-	struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+	struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
 
 	if (xdpsq->mpwqe.wqe)
 		mlx5e_xdp_mpwqe_complete(xdpsq);
@@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
 
 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
 {
+	sq->xmit_xdp_frame_check = is_mpw ?
+		mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
 	sq->xmit_xdp_frame = is_mpw ?
 		mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8b537a4b0840..b90923932668 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -33,17 +33,20 @@
 #define __MLX5_EN_XDP_H__
 
 #include "en.h"
+#include "en/txrx.h"
 
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 #define MLX5E_XDP_TX_EMPTY_DS_COUNT \
 	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+		      void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
 	return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
 }
 
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+	set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+	clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+	return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 {
 	if (sq->doorbell_cseg) {
@@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
 }
 
 static inline void
-mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+			 struct mlx5e_xdp_xmit_data *xdptxd,
 			 struct mlx5e_xdpsq_stats *stats)
 {
 	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
-	dma_addr_t dma_addr    = xdpi->dma_addr;
-	struct xdp_frame *xdpf = xdpi->xdpf;
 	struct mlx5_wqe_data_seg *dseg =
 		(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
-	u16 dma_len = xdpf->len;
+	u32 dma_len = xdptxd->len;
 
 	session->pkt_count++;
 
@@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
 		}
 
 		inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
-		memcpy(inline_dseg->data, xdpf->data, dma_len);
+		memcpy(inline_dseg->data, xdptxd->data, dma_len);
 
 		session->ds_count += ds_cnt;
 		stats->inlnw++;
@@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
 	}
 
 no_inline:
-	dseg->addr       = cpu_to_be64(dma_addr);
+	dseg->addr       = cpu_to_be64(xdptxd->dma_addr);
 	dseg->byte_count = cpu_to_be32(dma_len);
 	dseg->lkey       = sq->mkey_be;
 	session->ds_count++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
new file mode 100644
index 000000000000..5ee42991900a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/../..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000000..6a55573ec8f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{
+	/* Check in advance that we have enough frames, instead of allocating
+	 * one-by-one, failing and moving frames to the Reuse Ring.
+	 */
+	return xsk_umem_has_addrs_rq(rq->umem, count);
+}
+
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+			      struct mlx5e_dma_info *dma_info)
+{
+	struct xdp_umem *umem = rq->umem;
+	u64 handle;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle))
+		return -ENOMEM;
+
+	dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+	dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+
+	/* No need to add headroom to the DMA address. In striding RQ case, we
+	 * just provide pages for UMR, and headroom is counted at the setup
+	 * stage when creating a WQE. In non-striding RQ case, headroom is
+	 * accounted in mlx5e_alloc_rx_wqe.
+	 */
+	dma_info->addr = xdp_umem_get_dma(umem, handle);
+
+	xsk_umem_discard_addr_rq(umem);
+
+	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+				   DMA_BIDIRECTIONAL);
+
+	return 0;
+}
+
+static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
+{
+	xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+}
+
+/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
+ * the userspace if possible, and if not, this function is called to reuse them
+ * in the driver.
+ */
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+			    struct mlx5e_dma_info *dma_info)
+{
+	mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+}
+
+/* Return a frame back to the hardware to fill in again. It is used by XDP when
+ * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
+ */
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+	struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+
+	mlx5e_xsk_recycle_frame(rq, handle);
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
+					       u32 cqe_bcnt)
+{
+	struct sk_buff *skb;
+
+	skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+	if (unlikely(!skb)) {
+		rq->stats->buff_alloc_err++;
+		return NULL;
+	}
+
+	skb_put_data(skb, data, cqe_bcnt);
+
+	return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+						    struct mlx5e_mpw_info *wi,
+						    u16 cqe_bcnt,
+						    u32 head_offset,
+						    u32 page_idx)
+{
+	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+	u32 cqe_bcnt32 = cqe_bcnt;
+	void *va, *data;
+	u32 frag_size;
+	bool consumed;
+
+	/* Check packet size. Note LRO doesn't use linear SKB */
+	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+		rq->stats->oversize_pkts_sw_drop++;
+		return NULL;
+	}
+
+	/* head_offset is not used in this function, because di->xsk.data and
+	 * di->addr point directly to the necessary place. Furthermore, in the
+	 * current implementation, one page = one packet = one frame, so
+	 * head_offset should always be 0.
+	 */
+	WARN_ON_ONCE(head_offset);
+
+	va             = di->xsk.data;
+	data           = va + rx_headroom;
+	frag_size      = rq->buff.headroom + cqe_bcnt32;
+
+	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+	prefetch(data);
+
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+	rcu_read_unlock();
+
+	/* Possible flows:
+	 * - XDP_REDIRECT to XSKMAP:
+	 *   The page is owned by the userspace from now.
+	 * - XDP_TX and other XDP_REDIRECTs:
+	 *   The page was returned by ZCA and recycled.
+	 * - XDP_DROP:
+	 *   Recycle the page.
+	 * - XDP_PASS:
+	 *   Allocate an SKB, copy the data and recycle the page.
+	 *
+	 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+	 * size is the same as the Driver RX Ring's size, and pages for WQEs are
+	 * allocated first from the Reuse Ring, so it has enough space.
+	 */
+
+	if (likely(consumed)) {
+		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+		return NULL; /* page/packet was consumed by XDP */
+	}
+
+	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+	 * frame. On SKB allocation failure, NULL is returned.
+	 */
+	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+					      struct mlx5_cqe64 *cqe,
+					      struct mlx5e_wqe_frag_info *wi,
+					      u32 cqe_bcnt)
+{
+	struct mlx5e_dma_info *di = wi->di;
+	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+	void *va, *data;
+	bool consumed;
+	u32 frag_size;
+
+	/* wi->offset is not used in this function, because di->xsk.data and
+	 * di->addr point directly to the necessary place. Furthermore, in the
+	 * current implementation, one page = one packet = one frame, so
+	 * wi->offset should always be 0.
+	 */
+	WARN_ON_ONCE(wi->offset);
+
+	va             = di->xsk.data;
+	data           = va + rx_headroom;
+	frag_size      = rq->buff.headroom + cqe_bcnt;
+
+	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+	prefetch(data);
+
+	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
+		rq->stats->wqe_err++;
+		return NULL;
+	}
+
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+	rcu_read_unlock();
+
+	if (likely(consumed))
+		return NULL; /* page/packet was consumed by XDP */
+
+	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+	 * will be handled by mlx5e_put_rx_frag.
+	 * On SKB allocation failure, NULL is returned.
+	 */
+	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000000..307b923a1361
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+			      struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+			    struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+						    struct mlx5e_mpw_info *wi,
+						    u16 cqe_bcnt,
+						    u32 head_offset,
+						    u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+					      struct mlx5_cqe64 *cqe,
+					      struct mlx5e_wqe_frag_info *wi,
+					      u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000000..aaffa6f68dc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+			      struct mlx5e_xsk_param *xsk,
+			      struct mlx5_core_dev *mdev)
+{
+	/* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
+	 * mlx5e XDP implementation doesn't support multiple packets per page.
+	 */
+	if (xsk->chunk_size != PAGE_SIZE)
+		return false;
+
+	/* Current MTU and XSK headroom don't allow packets to fit the frames. */
+	if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+		return false;
+
+	/* frag_sz is different for regular and XSK RQs, so ensure that linear
+	 * SKB mode is possible.
+	 */
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		return mlx5e_rx_is_linear_skb(params, xsk);
+	}
+}
+
+static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
+				       u8 log_wq_size,
+				       struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+
+	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk,
+				   struct mlx5e_channel_param *cparam)
+{
+	const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+	mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+	mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
+	mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
+	mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+	mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_channel *c)
+{
+	struct mlx5e_channel_param cparam = {};
+	struct dim_cq_moder icocq_moder = {};
+	int err;
+
+	if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+		return -EINVAL;
+
+	mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
+
+	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
+	if (unlikely(err))
+		return err;
+
+	err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
+	if (unlikely(err))
+		goto err_close_rx_cq;
+
+	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
+	if (unlikely(err))
+		goto err_close_rq;
+
+	/* Create a separate SQ, so that when the UMEM is disabled, we could
+	 * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+	 * the XDPSQ was used instead, we might run into trouble when the UMEM
+	 * is disabled and then reenabled, but the SQ continues receiving CQEs
+	 * from the old UMEM.
+	 */
+	err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
+	if (unlikely(err))
+		goto err_close_tx_cq;
+
+	err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
+	if (unlikely(err))
+		goto err_close_sq;
+
+	/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+	 * triggered and NAPI to be called on the correct CPU.
+	 */
+	err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
+	if (unlikely(err))
+		goto err_close_icocq;
+
+	spin_lock_init(&c->xskicosq_lock);
+
+	set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+	return 0;
+
+err_close_icocq:
+	mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_sq:
+	mlx5e_close_xdpsq(&c->xsksq);
+
+err_close_tx_cq:
+	mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+	mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+	mlx5e_close_cq(&c->xskrq.cq);
+
+	return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+	clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+	napi_synchronize(&c->napi);
+
+	mlx5e_close_rq(&c->xskrq);
+	mlx5e_close_cq(&c->xskrq.cq);
+	mlx5e_close_icosq(&c->xskicosq);
+	mlx5e_close_cq(&c->xskicosq.cq);
+	mlx5e_close_xdpsq(&c->xsksq);
+	mlx5e_close_cq(&c->xsksq.cq);
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+	set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+	/* TX queue is created active. */
+	mlx5e_trigger_irq(&c->xskicosq);
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+	mlx5e_deactivate_rq(&c->xskrq);
+	/* TX queue is disabled on close. */
+}
+
+static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+	struct mlx5e_redirect_rqt_param direct_rrp = {
+		.is_rss = false,
+		{
+			.rqn = rqn,
+		},
+	};
+
+	u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+	return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+	return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+	return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	int err, i;
+
+	if (!priv->xsk.refcnt)
+		return 0;
+
+	for (i = 0; i < chs->num; i++) {
+		struct mlx5e_channel *c = chs->c[i];
+
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+			continue;
+
+		err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
+		if (unlikely(err))
+			goto err_stop;
+	}
+
+	return 0;
+
+err_stop:
+	for (i--; i >= 0; i--) {
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+			continue;
+
+		mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+	}
+
+	return err;
+}
+
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	int i;
+
+	if (!priv->xsk.refcnt)
+		return;
+
+	for (i = 0; i < chs->num; i++) {
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+			continue;
+
+		mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000000..0dd11b81c046
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+			      struct mlx5e_xsk_param *xsk,
+			      struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000000..35e188cf4ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "umem.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock.h>
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_channel *c;
+	u16 ix;
+
+	if (unlikely(!mlx5e_xdp_is_open(priv)))
+		return -ENETDOWN;
+
+	if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+		return -EINVAL;
+
+	c = priv->channels.c[ix];
+
+	if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
+		return -ENXIO;
+
+	if (!napi_if_scheduled_mark_missed(&c->napi)) {
+		spin_lock(&c->xskicosq_lock);
+		mlx5e_trigger_irq(&c->xskicosq);
+		spin_unlock(&c->xskicosq_lock);
+	}
+
+	return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+				  struct mlx5e_xdp_info *xdpi)
+{
+	u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+	struct mlx5e_tx_wqe *nopwqe;
+
+	wi->num_wqebbs = 1;
+	wi->num_pkts = 1;
+
+	nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+	sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+	struct xdp_umem *umem = sq->umem;
+	struct mlx5e_xdp_info xdpi;
+	struct mlx5e_xdp_xmit_data xdptxd;
+	bool work_done = true;
+	bool flush = false;
+
+	xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+	for (; budget; budget--) {
+		int check_result = sq->xmit_xdp_frame_check(sq);
+		struct xdp_desc desc;
+
+		if (unlikely(check_result < 0)) {
+			work_done = false;
+			break;
+		}
+
+		if (!xsk_umem_consume_tx(umem, &desc)) {
+			/* TX will get stuck until something wakes it up by
+			 * triggering NAPI. Currently it's expected that the
+			 * application calls sendto() if there are consumed, but
+			 * not completed frames.
+			 */
+			break;
+		}
+
+		xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
+		xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+		xdptxd.len = desc.len;
+
+		dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
+					   xdptxd.len, DMA_BIDIRECTIONAL);
+
+		if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+			if (sq->mpwqe.wqe)
+				mlx5e_xdp_mpwqe_complete(sq);
+
+			mlx5e_xsk_tx_post_err(sq, &xdpi);
+		}
+
+		flush = true;
+	}
+
+	if (flush) {
+		if (sq->mpwqe.wqe)
+			mlx5e_xdp_mpwqe_complete(sq);
+		mlx5e_xmit_xdp_doorbell(sq);
+
+		xsk_umem_consume_tx_done(umem);
+	}
+
+	return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000000..7add18bf78d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
new file mode 100644
index 000000000000..4baaa5788320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/xdp_sock.h>
+#include "umem.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
+			      struct xdp_umem *umem)
+{
+	struct device *dev = priv->mdev->device;
+	u32 i;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
+					      DMA_BIDIRECTIONAL);
+
+		if (unlikely(dma_mapping_error(dev, dma)))
+			goto err_unmap;
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+err_unmap:
+	while (i--) {
+		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		umem->pages[i].dma = 0;
+	}
+
+	return -ENOMEM;
+}
+
+static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
+				 struct xdp_umem *umem)
+{
+	struct device *dev = priv->mdev->device;
+	u32 i;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		umem->pages[i].dma = 0;
+	}
+}
+
+static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+{
+	if (!xsk->umems) {
+		xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+				     sizeof(*xsk->umems), GFP_KERNEL);
+		if (unlikely(!xsk->umems))
+			return -ENOMEM;
+	}
+
+	xsk->refcnt++;
+	xsk->ever_used = true;
+
+	return 0;
+}
+
+static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+{
+	if (!--xsk->refcnt) {
+		kfree(xsk->umems);
+		xsk->umems = NULL;
+	}
+}
+
+static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+{
+	int err;
+
+	err = mlx5e_xsk_get_umems(xsk);
+	if (unlikely(err))
+		return err;
+
+	xsk->umems[ix] = umem;
+	return 0;
+}
+
+static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+{
+	xsk->umems[ix] = NULL;
+
+	mlx5e_xsk_put_umems(xsk);
+}
+
+static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+{
+	return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+{
+	xsk->headroom = umem->headroom;
+	xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+				   struct xdp_umem *umem, u16 ix)
+{
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_xsk_param xsk;
+	struct mlx5e_channel *c;
+	int err;
+
+	if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+		return -EBUSY;
+
+	if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+		return -EINVAL;
+
+	err = mlx5e_xsk_map_umem(priv, umem);
+	if (unlikely(err))
+		return err;
+
+	err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+	if (unlikely(err))
+		goto err_unmap_umem;
+
+	mlx5e_build_xsk_param(umem, &xsk);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		/* XSK objects will be created on open. */
+		goto validate_closed;
+	}
+
+	if (!params->xdp_prog) {
+		/* XSK objects will be created when an XDP program is set,
+		 * and the channels are reopened.
+		 */
+		goto validate_closed;
+	}
+
+	c = priv->channels.c[ix];
+
+	err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+	if (unlikely(err))
+		goto err_remove_umem;
+
+	mlx5e_activate_xsk(c);
+
+	/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+	 * any Fill Ring entries at the setup stage.
+	 */
+
+	err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+	if (unlikely(err))
+		goto err_deactivate;
+
+	return 0;
+
+err_deactivate:
+	mlx5e_deactivate_xsk(c);
+	mlx5e_close_xsk(c);
+
+err_remove_umem:
+	mlx5e_xsk_remove_umem(&priv->xsk, ix);
+
+err_unmap_umem:
+	mlx5e_xsk_unmap_umem(priv, umem);
+
+	return err;
+
+validate_closed:
+	/* Check the configuration in advance, rather than fail at a later stage
+	 * (in mlx5e_xdp_set or on open) and end up with no channels.
+	 */
+	if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+		err = -EINVAL;
+		goto err_remove_umem;
+	}
+
+	return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+	struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+						   &priv->xsk, ix);
+	struct mlx5e_channel *c;
+
+	if (unlikely(!umem))
+		return -EINVAL;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto remove_umem;
+
+	/* XSK RQ and SQ are only created if XDP program is set. */
+	if (!priv->channels.params.xdp_prog)
+		goto remove_umem;
+
+	c = priv->channels.c[ix];
+	mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+	mlx5e_deactivate_xsk(c);
+	mlx5e_close_xsk(c);
+
+remove_umem:
+	mlx5e_xsk_remove_umem(&priv->xsk, ix);
+	mlx5e_xsk_unmap_umem(priv, umem);
+
+	return 0;
+}
+
+static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+				 u16 ix)
+{
+	int err;
+
+	mutex_lock(&priv->state_lock);
+	err = mlx5e_xsk_enable_locked(priv, umem, ix);
+	mutex_unlock(&priv->state_lock);
+
+	return err;
+}
+
+static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+{
+	int err;
+
+	mutex_lock(&priv->state_lock);
+	err = mlx5e_xsk_disable_locked(priv, ix);
+	mutex_unlock(&priv->state_lock);
+
+	return err;
+}
+
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_params *params = &priv->channels.params;
+	u16 ix;
+
+	if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+		return -EINVAL;
+
+	return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
+		      mlx5e_xsk_disable_umem(priv, ix);
+}
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
+{
+	struct xdp_umem_fq_reuse *reuseq;
+
+	reuseq = xsk_reuseq_prepare(nentries);
+	if (unlikely(!reuseq))
+		return -ENOMEM;
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	return 0;
+}
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
+{
+	u16 res = xsk->refcnt ? params->num_channels : 0;
+
+	while (res) {
+		if (mlx5e_xsk_get_umem(params, xsk, res - 1))
+			break;
+		--res;
+	}
+
+	return res;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
new file mode 100644
index 000000000000..25b4cbe58b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_UMEM_H__
+#define __MLX5_EN_XSK_UMEM_H__
+
+#include "en.h"
+
+static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
+						  struct mlx5e_xsk *xsk, u16 ix)
+{
+	if (!xsk || !xsk->umems)
+		return NULL;
+
+	if (unlikely(ix >= params->num_channels))
+		return NULL;
+
+	return xsk->umems[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
+
+#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 6da7c88742dc..3022463f2284 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -39,6 +39,7 @@
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/tls_rxtx.h"
 #include "en.h"
+#include "en/txrx.h"
 
 #if IS_ENABLED(CONFIG_GENEVE)
 static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index ca47c0540904..db84500b024f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -39,6 +39,7 @@
 #include <linux/skbuff.h>
 #include <net/xfrm.h>
 #include "en.h"
+#include "en/txrx.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
 					  struct sk_buff *skb, u32 *cqe_bcnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
new file mode 100644
index 000000000000..d2ff74d52720
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en.h"
+#include "en_accel/ktls.h"
+
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+{
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+	void *tisc;
+
+	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+	MLX5_SET(tisc, tisc, tls_en, 1);
+
+	return mlx5e_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+			  enum tls_offload_ctx_dir direction,
+			  struct tls_crypto_info *crypto_info,
+			  u32 start_offload_tcp_sn)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_ktls_offload_context_tx *tx_priv;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+
+	if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX))
+		return -EINVAL;
+
+	if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+		return -EOPNOTSUPP;
+
+	tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL);
+	if (!tx_priv)
+		return -ENOMEM;
+
+	tx_priv->expected_seq = start_offload_tcp_sn;
+	tx_priv->crypto_info  = crypto_info;
+	mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
+
+	/* tc and underlay_qpn values are not in use for tls tis */
+	err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn);
+	if (err)
+		goto create_tis_fail;
+
+	err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id);
+	if (err)
+		goto encryption_key_create_fail;
+
+	mlx5e_ktls_tx_offload_set_pending(tx_priv);
+
+	return 0;
+
+encryption_key_create_fail:
+	mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+create_tis_fail:
+	kvfree(tx_priv);
+	return err;
+}
+
+static void mlx5e_ktls_del(struct net_device *netdev,
+			   struct tls_context *tls_ctx,
+			   enum tls_offload_ctx_dir direction)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_ktls_offload_context_tx *tx_priv =
+		mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+	mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id);
+	mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+	kvfree(tx_priv);
+}
+
+static const struct tlsdev_ops mlx5e_ktls_ops = {
+	.tls_dev_add = mlx5e_ktls_add,
+	.tls_dev_del = mlx5e_ktls_del,
+};
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+
+	if (!mlx5_accel_is_ktls_device(priv->mdev))
+		return;
+
+	netdev->hw_features |= NETIF_F_HW_TLS_TX;
+	netdev->features    |= NETIF_F_HW_TLS_TX;
+
+	netdev->tlsdev_ops = &mlx5e_ktls_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
new file mode 100644
index 000000000000..407da83474ef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_KTLS_H__
+#define __MLX5E_KTLS_H__
+
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+#include <net/tls.h>
+#include "accel/tls.h"
+
+#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
+	(sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params))
+#define MLX5E_KTLS_STATIC_WQEBBS \
+	(DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_PROGRESS_WQE_SZ \
+	(sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params))
+#define MLX5E_KTLS_PROGRESS_WQEBBS \
+	(DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
+
+enum {
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD     = 0,
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD        = 1,
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START     = 0,
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING  = 2,
+};
+
+struct mlx5e_ktls_offload_context_tx {
+	struct tls_offload_context_tx *tx_ctx;
+	struct tls_crypto_info *crypto_info;
+	u32 expected_seq;
+	u32 tisn;
+	u32 key_id;
+	bool ctx_post_pending;
+};
+
+struct mlx5e_ktls_offload_context_tx_shadow {
+	struct tls_offload_context_tx         tx_ctx;
+	struct mlx5e_ktls_offload_context_tx *priv_tx;
+};
+
+static inline void
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+			   struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+	struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+	BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+	shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+	shadow->priv_tx = priv_tx;
+	priv_tx->tx_ctx = tx_ctx;
+}
+
+static inline struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+	struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+	struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+	BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+	shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+	return shadow->priv_tx;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+					 struct mlx5e_txqsq *sq,
+					 struct sk_buff *skb,
+					 struct mlx5e_tx_wqe **wqe, u16 *pi);
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					   struct mlx5e_tx_wqe_info *wi,
+					   struct mlx5e_sq_dma *dma);
+
+#else
+
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
new file mode 100644
index 000000000000..5c08891806f0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en_accel/ktls.h"
+
+enum {
+	MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+	MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+	salt    = info->salt;    \
+	rec_seq = info->rec_seq; \
+	salt_sz    = sizeof(info->salt);    \
+	rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+	char *initial_rn, *gcm_iv;
+	u16 salt_sz, rec_seq_sz;
+	char *salt, *rec_seq;
+	u8 tls_version;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *info =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+		EXTRACT_INFO_FIELDS;
+		break;
+	}
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	gcm_iv      = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+	initial_rn  = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+	memcpy(gcm_iv,      salt,    salt_sz);
+	memcpy(initial_rn,  rec_seq, rec_seq_sz);
+
+	tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+	MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+	MLX5_SET(tls_static_params, ctx, const_1, 1);
+	MLX5_SET(tls_static_params, ctx, const_2, 2);
+	MLX5_SET(tls_static_params, ctx, encryption_standard,
+		 MLX5E_ENCRYPTION_STANDARD_TLS);
+	MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id);
+}
+
+static void
+build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
+		    struct mlx5e_ktls_offload_context_tx *priv_tx,
+		    bool fence)
+{
+	struct mlx5_wqe_ctrl_seg     *cseg  = &wqe->ctrl;
+	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+
+#define STATIC_PARAMS_DS_CNT \
+	DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS)
+
+	cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR |
+					     (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24));
+	cseg->qpn_ds           = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+					     STATIC_PARAMS_DS_CNT);
+	cseg->fm_ce_se         = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+	cseg->imm              = cpu_to_be32(priv_tx->tisn);
+
+	ucseg->flags = MLX5_UMR_INLINE;
+	ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+	fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx);
+}
+
+static void
+fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn);
+	MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+		 MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+	MLX5_SET(tls_progress_params, ctx, auth_state,
+		 MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+static void
+build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
+		      struct mlx5e_ktls_offload_context_tx *priv_tx,
+		      bool fence)
+{
+	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+#define PROGRESS_PARAMS_DS_CNT \
+	DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS)
+
+	cseg->opmod_idx_opcode =
+		cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV |
+			    (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24));
+	cseg->qpn_ds           = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+					     PROGRESS_PARAMS_DS_CNT);
+	cseg->fm_ce_se         = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+	fill_progress_params_ctx(wqe->data, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+		       u16 pi, u8 num_wqebbs,
+		       skb_frag_t *resync_dump_frag)
+{
+	struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+	wi->skb              = NULL;
+	wi->num_wqebbs       = num_wqebbs;
+	wi->resync_dump_frag = resync_dump_frag;
+}
+
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	priv_tx->ctx_post_pending = true;
+}
+
+static bool
+mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	bool ret = priv_tx->ctx_post_pending;
+
+	priv_tx->ctx_post_pending = false;
+
+	return ret;
+}
+
+static void
+post_static_params(struct mlx5e_txqsq *sq,
+		   struct mlx5e_ktls_offload_context_tx *priv_tx,
+		   bool fence)
+{
+	struct mlx5e_umr_wqe *umr_wqe;
+	u16 pi;
+
+	umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
+	build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
+	tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL);
+	sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+}
+
+static void
+post_progress_params(struct mlx5e_txqsq *sq,
+		     struct mlx5e_ktls_offload_context_tx *priv_tx,
+		     bool fence)
+{
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi;
+
+	wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
+	build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
+	tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL);
+	sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+}
+
+static void
+mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
+			      struct mlx5e_ktls_offload_context_tx *priv_tx,
+			      bool skip_static_post, bool fence_first_post)
+{
+	bool progress_fence = skip_static_post || !fence_first_post;
+
+	if (!skip_static_post)
+		post_static_params(sq, priv_tx, fence_first_post);
+
+	post_progress_params(sq, priv_tx, progress_fence);
+}
+
+struct tx_sync_info {
+	u64 rcd_sn;
+	s32 sync_len;
+	int nr_frags;
+	skb_frag_t *frags[MAX_SKB_FRAGS];
+};
+
+static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+			     u32 tcp_seq, struct tx_sync_info *info)
+{
+	struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+	struct tls_record_info *record;
+	int remaining, i = 0;
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&tx_ctx->lock, flags);
+	record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
+
+	if (unlikely(!record)) {
+		ret = false;
+		goto out;
+	}
+
+	if (unlikely(tcp_seq < tls_record_start_seq(record))) {
+		if (!tls_record_is_start_marker(record))
+			ret = false;
+		goto out;
+	}
+
+	info->sync_len = tcp_seq - tls_record_start_seq(record);
+	remaining = info->sync_len;
+	while (remaining > 0) {
+		skb_frag_t *frag = &record->frags[i];
+
+		__skb_frag_ref(frag);
+		remaining -= skb_frag_size(frag);
+		info->frags[i++] = frag;
+	}
+	/* reduce the part which will be sent with the original SKB */
+	if (remaining < 0)
+		skb_frag_size_add(info->frags[i - 1], remaining);
+	info->nr_frags = i;
+out:
+	spin_unlock_irqrestore(&tx_ctx->lock, flags);
+	return ret;
+}
+
+static void
+tx_post_resync_params(struct mlx5e_txqsq *sq,
+		      struct mlx5e_ktls_offload_context_tx *priv_tx,
+		      u64 rcd_sn)
+{
+	struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+	__be64 rn_be = cpu_to_be64(rcd_sn);
+	bool skip_static_post;
+	u16 rec_seq_sz;
+	char *rec_seq;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *info =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+		rec_seq = info->rec_seq;
+		rec_seq_sz = sizeof(info->rec_seq);
+		break;
+	}
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
+	if (!skip_static_post)
+		memcpy(rec_seq, &rn_be, rec_seq_sz);
+
+	mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
+}
+
+static int
+tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		    skb_frag_t *frag, u32 tisn, bool first)
+{
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct mlx5_wqe_eth_seg  *eseg;
+	struct mlx5_wqe_data_seg *dseg;
+	struct mlx5e_tx_wqe *wqe;
+	dma_addr_t dma_addr = 0;
+	u16 ds_cnt, ds_cnt_inl;
+	u8  num_wqebbs;
+	u16 pi, ihs;
+	int fsz;
+
+	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+	ihs    = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
+	ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+	ds_cnt += ds_cnt_inl;
+	ds_cnt += 1; /* one frag */
+
+	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+
+	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+
+	cseg = &wqe->ctrl;
+	eseg = &wqe->eth;
+	dseg =  wqe->data;
+
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8)  | MLX5_OPCODE_DUMP);
+	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+	cseg->imm              = cpu_to_be32(tisn);
+	cseg->fm_ce_se         = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+	eseg->inline_hdr.sz = cpu_to_be16(ihs);
+	memcpy(eseg->inline_hdr.start, skb->data, ihs);
+	dseg += ds_cnt_inl;
+
+	fsz = skb_frag_size(frag);
+	dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+				    DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+		return -ENOMEM;
+
+	dseg->addr       = cpu_to_be64(dma_addr);
+	dseg->lkey       = sq->mkey_be;
+	dseg->byte_count = cpu_to_be32(fsz);
+	mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+
+	tx_fill_wi(sq, pi, num_wqebbs, frag);
+	sq->pc += num_wqebbs;
+
+	WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS,
+	     "unexpected DUMP num_wqebbs, %d > %d",
+	     num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS);
+
+	return 0;
+}
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					   struct mlx5e_tx_wqe_info *wi,
+					   struct mlx5e_sq_dma *dma)
+{
+	struct mlx5e_sq_stats *stats = sq->stats;
+
+	mlx5e_tx_dma_unmap(sq->pdev, dma);
+	__skb_frag_unref(wi->resync_dump_frag);
+	stats->tls_dump_packets++;
+	stats->tls_dump_bytes += wi->num_bytes;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+	tx_fill_wi(sq, pi, 1, NULL);
+
+	mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
+static struct sk_buff *
+mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+			 struct mlx5e_txqsq *sq,
+			 struct sk_buff *skb,
+			 u32 seq)
+{
+	struct mlx5e_sq_stats *stats = sq->stats;
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct tx_sync_info info = {};
+	u16 contig_wqebbs_room, pi;
+	u8 num_wqebbs;
+	int i;
+
+	if (!tx_sync_info_get(priv_tx, seq, &info)) {
+		/* We might get here if a retransmission reaches the driver
+		 * after the relevant record is acked.
+		 * It should be safe to drop the packet in this case
+		 */
+		stats->tls_drop_no_sync_data++;
+		goto err_out;
+	}
+
+	if (unlikely(info.sync_len < 0)) {
+		u32 payload;
+		int headln;
+
+		headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		payload = skb->len - headln;
+		if (likely(payload <= -info.sync_len))
+			return skb;
+
+		stats->tls_drop_bypass_req++;
+		goto err_out;
+	}
+
+	stats->tls_ooo++;
+
+	num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS +
+		(info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1);
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs))
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+
+	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+	for (i = 0; i < info.nr_frags; i++)
+		if (tx_post_resync_dump(sq, skb, info.frags[i],
+					priv_tx->tisn, !i))
+			goto err_out;
+
+	/* If no dump WQE was sent, we need to have a fence NOP WQE before the
+	 * actual data xmit.
+	 */
+	if (!info.nr_frags)
+		tx_post_fence_nop(sq);
+
+	return skb;
+
+err_out:
+	dev_kfree_skb_any(skb);
+	return NULL;
+}
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+					 struct mlx5e_txqsq *sq,
+					 struct sk_buff *skb,
+					 struct mlx5e_tx_wqe **wqe, u16 *pi)
+{
+	struct mlx5e_ktls_offload_context_tx *priv_tx;
+	struct mlx5e_sq_stats *stats = sq->stats;
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct tls_context *tls_ctx;
+	int datalen;
+	u32 seq;
+
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		goto out;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	if (!datalen)
+		goto out;
+
+	tls_ctx = tls_get_ctx(skb->sk);
+	if (unlikely(tls_ctx->netdev != netdev))
+		goto err_out;
+
+	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+		mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
+		*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+		stats->tls_ctx++;
+	}
+
+	seq = ntohl(tcp_hdr(skb)->seq);
+	if (unlikely(priv_tx->expected_seq != seq)) {
+		skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq);
+		if (unlikely(!skb))
+			goto out;
+		*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+	}
+
+	priv_tx->expected_seq = seq + datalen;
+
+	cseg = &(*wqe)->ctrl;
+	cseg->imm = cpu_to_be32(priv_tx->tisn);
+
+	stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+	stats->tls_encrypted_bytes   += datalen;
+
+out:
+	return skb;
+
+err_out:
+	dev_kfree_skb_any(skb);
+	return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index e88340e196f7..fba561ffe1d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -160,25 +160,31 @@ static void mlx5e_tls_del(struct net_device *netdev,
 				direction == TLS_OFFLOAD_CTX_DIR_TX);
 }
 
-static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
-				u32 seq, u64 rcd_sn)
+static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
+			    u32 seq, u8 *rcd_sn_data,
+			    enum tls_offload_ctx_dir direction)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_rx *rx_ctx;
+	u64 rcd_sn = *(u64 *)rcd_sn_data;
 
+	if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
+		return -EINVAL;
 	rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
 
 	netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
 		    be64_to_cpu(rcd_sn));
 	mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
 	atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
+
+	return 0;
 }
 
 static const struct tlsdev_ops mlx5e_tls_ops = {
 	.tls_dev_add = mlx5e_tls_add,
 	.tls_dev_del = mlx5e_tls_del,
-	.tls_dev_resync_rx = mlx5e_tls_resync_rx,
+	.tls_dev_resync = mlx5e_tls_resync,
 };
 
 void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
@@ -186,6 +192,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
 	struct net_device *netdev = priv->netdev;
 	u32 caps;
 
+	if (mlx5_accel_is_ktls_device(priv->mdev)) {
+		mlx5e_ktls_build_netdev(priv);
+		return;
+	}
+
 	if (!mlx5_accel_is_tls_device(priv->mdev))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 3f5d72163b56..9015f3f7792d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -33,8 +33,10 @@
 #ifndef __MLX5E_TLS_H__
 #define __MLX5E_TLS_H__
 
-#ifdef CONFIG_MLX5_EN_TLS
+#include "accel/tls.h"
+#include "en_accel/ktls.h"
 
+#ifdef CONFIG_MLX5_EN_TLS
 #include <net/tls.h>
 #include "en.h"
 
@@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
 
 #else
 
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+	if (mlx5_accel_is_ktls_device(priv->mdev))
+		mlx5e_ktls_build_netdev(priv);
+}
+
 static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
 static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 439bf5953885..71384ad1a443 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -248,7 +248,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
 	mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
-	mlx5e_sq_fetch_wqe(sq, wqe, pi);
+	*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
 	return skb;
 
 err_out:
@@ -269,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
 	int datalen;
 	u32 skb_seq;
 
+	if (MLX5_CAP_GEN(sq->channel->mdev, tls)) {
+		skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
+		goto out;
+	}
+
 	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 311667ec71b8..90bc1f2384c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -38,6 +38,7 @@
 
 #include <linux/skbuff.h>
 #include "en.h"
+#include "en/txrx.h"
 
 struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
 					struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 554672edf8c3..8dd31b5c740c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
 
 	memset(perm_addr, 0xff, MAX_ADDR_LEN);
 
-	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+	mlx5_query_mac_address(priv->mdev, perm_addr);
 }
 
 static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index d67adf70a97b..ca9cfbf57d8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -30,22 +30,22 @@
  * SOFTWARE.
  */
 
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "en.h"
 
 static void
-mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
 			struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
 {
 	mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 void mlx5e_rx_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
@@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work)
 
 void mlx5e_tx_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
 
 	mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index dd764e0471f2..126ec4181286 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,6 +32,7 @@
 
 #include "en.h"
 #include "en/port.h"
+#include "en/xsk/umem.h"
 #include "lib/clock.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
 		 "%d.%d.%04d (%.16s)",
 		 fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
 		 mdev->board_id);
-	strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+	strlcpy(drvinfo->bus_info, dev_name(mdev->device),
 		sizeof(drvinfo->bus_info));
 }
 
@@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev,
 void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
 				struct ethtool_channels *ch)
 {
+	mutex_lock(&priv->state_lock);
+
 	ch->max_combined   = mlx5e_get_netdev_max_channels(priv->netdev);
 	ch->combined_count = priv->channels.params.num_channels;
+	if (priv->xsk.refcnt) {
+		/* The upper half are XSK queues. */
+		ch->max_combined *= 2;
+		ch->combined_count *= 2;
+	}
+
+	mutex_unlock(&priv->state_lock);
 }
 
 static void mlx5e_get_channels(struct net_device *dev,
@@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev,
 int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 			       struct ethtool_channels *ch)
 {
+	struct mlx5e_params *cur_params = &priv->channels.params;
 	unsigned int count = ch->combined_count;
 	struct mlx5e_channels new_channels = {};
 	bool arfs_enabled;
@@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 		return -EINVAL;
 	}
 
-	if (priv->channels.params.num_channels == count)
+	if (cur_params->num_channels == count)
 		return 0;
 
 	mutex_lock(&priv->state_lock);
 
+	/* Don't allow changing the number of channels if there is an active
+	 * XSK, because the numeration of the XSK and regular RQs will change.
+	 */
+	if (priv->xsk.refcnt) {
+		err = -EINVAL;
+		netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
+			   __func__);
+		goto out;
+	}
+
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_channels = count;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
+		*cur_params = new_channels.params;
 		if (!netif_is_rxfh_configured(priv->netdev))
 			mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
 						      MLX5E_INDIR_RQT_SIZE, count);
@@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev,
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
-	struct net_dim_cq_moder *rx_moder, *tx_moder;
+	struct dim_cq_moder *rx_moder, *tx_moder;
 
 	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
 		return -EOPNOTSUPP;
@@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
-	struct net_dim_cq_moder *rx_moder, *tx_moder;
+	struct dim_cq_moder *rx_moder, *tx_moder;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	int err = 0;
@@ -1867,40 +1888,6 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev)
 	return priv->channels.params.pflags;
 }
 
-int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
-			       struct ethtool_flash *flash)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct net_device *dev = priv->netdev;
-	const struct firmware *fw;
-	int err;
-
-	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
-		return -EOPNOTSUPP;
-
-	err = request_firmware_direct(&fw, flash->data, &dev->dev);
-	if (err)
-		return err;
-
-	dev_hold(dev);
-	rtnl_unlock();
-
-	err = mlx5_firmware_flash(mdev, fw);
-	release_firmware(fw);
-
-	rtnl_lock();
-	dev_put(dev);
-	return err;
-}
-
-static int mlx5e_flash_device(struct net_device *dev,
-			      struct ethtool_flash *flash)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	return mlx5e_ethtool_flash_device(priv, flash);
-}
-
 #ifndef CONFIG_MLX5_EN_RXNFC
 /* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
  * otherwise this function will be defined from en_fs_ethtool.c
@@ -1939,7 +1926,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 #ifdef CONFIG_MLX5_EN_RXNFC
 	.set_rxnfc         = mlx5e_set_rxnfc,
 #endif
-	.flash_device      = mlx5e_flash_device,
 	.get_tunable       = mlx5e_get_tunable,
 	.set_tunable       = mlx5e_set_tunable,
 	.get_pauseparam    = mlx5e_get_pauseparam,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 4421c10f58ae..ea3a490b569a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -32,6 +32,8 @@
 
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "en/params.h"
+#include "en/xsk/umem.h"
 
 struct mlx5e_ethtool_rule {
 	struct list_head             list;
@@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
 		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	} else {
+		struct mlx5e_params *params = &priv->channels.params;
+		enum mlx5e_rq_group group;
+		struct mlx5e_tir *tir;
+		u16 ix;
+
+		mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+		tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
+
 		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 		if (!dst) {
 			err = -ENOMEM;
@@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 		}
 
 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+		dst->tir_num = tir[ix].tirn;
 		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+	spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
@@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv,
 	if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
 		return -ENOSPC;
 
-	if (fs->ring_cookie >= priv->channels.params.num_channels &&
-	    fs->ring_cookie != RX_CLS_FLOW_DISC)
-		return -EINVAL;
+	if (fs->ring_cookie != RX_CLS_FLOW_DISC)
+		if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie))
+			return -EINVAL;
 
 	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
 	case ETHER_FLOW:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a8e8350b38aa..6d0ae87c8ded 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -38,8 +38,10 @@
 #include <linux/bpf.h>
 #include <linux/if_bridge.h>
 #include <net/page_pool.h>
+#include <net/xdp_sock.h>
 #include "eswitch.h"
 #include "en.h"
+#include "en/txrx.h"
 #include "en_tc.h"
 #include "en_rep.h"
 #include "en_accel/ipsec.h"
@@ -56,35 +58,11 @@
 #include "en/monitor_stats.h"
 #include "en/reporter.h"
 #include "en/params.h"
+#include "en/xsk/umem.h"
+#include "en/xsk/setup.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
 
-struct mlx5e_rq_param {
-	u32			rqc[MLX5_ST_SZ_DW(rqc)];
-	struct mlx5_wq_param	wq;
-	struct mlx5e_rq_frags_info frags_info;
-};
-
-struct mlx5e_sq_param {
-	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
-	struct mlx5_wq_param       wq;
-	bool                       is_mpw;
-};
-
-struct mlx5e_cq_param {
-	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
-	struct mlx5_wq_param       wq;
-	u16                        eq_ix;
-	u8                         cq_period_mode;
-};
-
-struct mlx5e_channel_param {
-	struct mlx5e_rq_param      rq;
-	struct mlx5e_sq_param      sq;
-	struct mlx5e_sq_param      xdp_sq;
-	struct mlx5e_sq_param      icosq;
-	struct mlx5e_cq_param      rx_cq;
-	struct mlx5e_cq_param      tx_cq;
-	struct mlx5e_cq_param      icosq_cq;
-};
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
@@ -114,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
-		       BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
 		       BIT(params->log_rq_mtu_frames),
-		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params)
 {
-	return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		!MLX5_IPSEC_DEV(mdev) &&
-		!(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+		return false;
+
+	if (MLX5_IPSEC_DEV(mdev))
+		return false;
+
+	if (params->xdp_prog) {
+		/* XSK params are not considered here. If striding RQ is in use,
+		 * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
+		 * be called with the known XSK params.
+		 */
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+			return false;
+	}
+
+	return true;
 }
 
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -394,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq)
 
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct xdp_umem *umem,
 			  struct mlx5e_rq_param *rqp,
 			  struct mlx5e_rq *rq)
 {
@@ -401,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	struct mlx5_core_dev *mdev = c->mdev;
 	void *rqc = rqp->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+	u32 num_xsk_frames = 0;
+	u32 rq_xdp_ix;
 	u32 pool_size;
 	int wq_sz;
 	int err;
@@ -417,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
 	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	rq->stats   = &c->priv->channel_stats[c->ix].rq;
+	rq->xdpsq   = &c->rq_xdpsq;
+	rq->umem    = umem;
+
+	if (rq->umem)
+		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+	else
+		rq->stats = &c->priv->channel_stats[c->ix].rq;
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
 	if (IS_ERR(rq->xdp_prog)) {
@@ -426,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		goto err_rq_wq_destroy;
 	}
 
-	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+	rq_xdp_ix = rq->ix;
+	if (xsk)
+		rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
+	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
 	if (err < 0)
 		goto err_rq_wq_destroy;
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+	rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
 	pool_size = 1 << params->log_rq_mtu_frames;
 
 	switch (rq->wq_type) {
@@ -445,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-		pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+		if (xsk)
+			num_xsk_frames = wq_sz <<
+				mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+
+		pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
+			mlx5e_mpwqe_get_log_rq_size(params, xsk);
 
 		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -464,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_wq_destroy;
 		}
 
-		rq->mpwqe.skb_from_cqe_mpwrq =
-			mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
-			mlx5e_skb_from_cqe_mpwrq_linear :
-			mlx5e_skb_from_cqe_mpwrq_nonlinear;
-		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
-		rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+		rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+			mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+			mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+				mlx5e_skb_from_cqe_mpwrq_linear :
+				mlx5e_skb_from_cqe_mpwrq_nonlinear;
+
+		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+		rq->mpwqe.num_strides =
+			BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
 
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
@@ -490,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
 
+		if (xsk)
+			num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
+
 		rq->wqe.info = rqp->frags_info;
 		rq->wqe.frags =
 			kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
@@ -503,6 +519,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
 		if (err)
 			goto err_free;
+
 		rq->post_wqes = mlx5e_post_rx_wqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
@@ -518,33 +535,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_free;
 		}
 
-		rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ?
-			mlx5e_skb_from_cqe_linear :
-			mlx5e_skb_from_cqe_nonlinear;
+		rq->wqe.skb_from_cqe = xsk ?
+			mlx5e_xsk_skb_from_cqe_linear :
+			mlx5e_rx_is_linear_skb(params, NULL) ?
+				mlx5e_skb_from_cqe_linear :
+				mlx5e_skb_from_cqe_nonlinear;
 		rq->mkey_be = c->mkey_be;
 	}
 
-	/* Create a page_pool and register it with rxq */
-	pp_params.order     = 0;
-	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
-	pp_params.pool_size = pool_size;
-	pp_params.nid       = cpu_to_node(c->cpu);
-	pp_params.dev       = c->pdev;
-	pp_params.dma_dir   = rq->buff.map_dir;
-
-	/* page_pool can be used even when there is no rq->xdp_prog,
-	 * given page_pool does not handle DMA mapping there is no
-	 * required state to clear. And page_pool gracefully handle
-	 * elevated refcnt.
-	 */
-	rq->page_pool = page_pool_create(&pp_params);
-	if (IS_ERR(rq->page_pool)) {
-		err = PTR_ERR(rq->page_pool);
-		rq->page_pool = NULL;
-		goto err_free;
+	if (xsk) {
+		err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
+		if (unlikely(err)) {
+			mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
+				      num_xsk_frames);
+			goto err_free;
+		}
+
+		rq->zca.free = mlx5e_xsk_zca_free;
+		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+						 MEM_TYPE_ZERO_COPY,
+						 &rq->zca);
+	} else {
+		/* Create a page_pool and register it with rxq */
+		pp_params.order     = 0;
+		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
+		pp_params.pool_size = pool_size;
+		pp_params.nid       = cpu_to_node(c->cpu);
+		pp_params.dev       = c->pdev;
+		pp_params.dma_dir   = rq->buff.map_dir;
+
+		/* page_pool can be used even when there is no rq->xdp_prog,
+		 * given page_pool does not handle DMA mapping there is no
+		 * required state to clear. And page_pool gracefully handle
+		 * elevated refcnt.
+		 */
+		rq->page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(rq->page_pool)) {
+			err = PTR_ERR(rq->page_pool);
+			rq->page_pool = NULL;
+			goto err_free;
+		}
+		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+						 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	}
-	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-					 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	if (err)
 		goto err_free;
 
@@ -584,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 	switch (params->rx_cq_moderation.cq_period_mode) {
 	case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
-		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 		break;
 	case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
 	default:
-		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	}
 
 	rq->page_cache.head = 0;
@@ -611,8 +644,7 @@ err_rq_wq_destroy:
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	if (rq->page_pool)
-		page_pool_destroy(rq->page_pool);
+	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -625,10 +657,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 
-	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	if (rq->page_pool)
-		page_pool_destroy(rq->page_pool);
-
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
@@ -643,8 +671,15 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
 		struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
 
-		mlx5e_page_release(rq, dma_info, false);
+		/* With AF_XDP, page_cache is not used, so this loop is not
+		 * entered, and it's safe to call mlx5e_page_release_dynamic
+		 * directly.
+		 */
+		mlx5e_page_release_dynamic(rq, dma_info, false);
 	}
+
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
@@ -778,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 	mlx5_core_destroy_rq(rq->mdev, rq->rqn);
 }
 
-static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 {
 	unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
 	struct mlx5e_channel *c = rq->channel;
@@ -836,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 
 }
 
-static int mlx5e_open_rq(struct mlx5e_channel *c,
-			 struct mlx5e_params *params,
-			 struct mlx5e_rq_param *param,
-			 struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+		  struct xdp_umem *umem, struct mlx5e_rq *rq)
 {
 	int err;
 
-	err = mlx5e_alloc_rq(c, params, param, rq);
+	err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
 	if (err)
 		return err;
 
@@ -881,13 +915,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq)
 	mlx5e_trigger_irq(&rq->channel->icosq);
 }
 
-static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 {
 	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 }
 
-static void mlx5e_close_rq(struct mlx5e_rq *rq)
+void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	cancel_work_sync(&rq->dim.work);
 	mlx5e_destroy_rq(rq);
@@ -940,6 +974,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 
 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
+			     struct xdp_umem *umem,
 			     struct mlx5e_sq_param *param,
 			     struct mlx5e_xdpsq *sq,
 			     bool is_redirect)
@@ -955,9 +990,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	sq->stats     = is_redirect ?
-		&c->priv->channel_stats[c->ix].xdpsq :
-		&c->priv->channel_stats[c->ix].rq_xdpsq;
+	sq->umem      = umem;
+
+	sq->stats = sq->umem ?
+		&c->priv->channel_stats[c->ix].xsksq :
+		is_redirect ?
+			&c->priv->channel_stats[c->ix].xdpsq :
+			&c->priv->channel_stats[c->ix].rq_xdpsq;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1087,11 +1126,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
+	sq->stop_room = MLX5E_SQ_STOP_ROOM;
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-	if (mlx5_accel_is_tls_device(c->priv->mdev))
+	if (mlx5_accel_is_tls_device(c->priv->mdev)) {
 		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+		sq->stop_room += MLX5E_SQ_TLS_ROOM;
+	}
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1337,10 +1379,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
 	mlx5e_tx_reporter_err_cqe(sq);
 }
 
-static int mlx5e_open_icosq(struct mlx5e_channel *c,
-			    struct mlx5e_params *params,
-			    struct mlx5e_sq_param *param,
-			    struct mlx5e_icosq *sq)
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
 {
 	struct mlx5e_create_sq_param csp = {};
 	int err;
@@ -1366,7 +1406,7 @@ err_free_icosq:
 	return err;
 }
 
-static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 
@@ -1377,16 +1417,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 	mlx5e_free_icosq(sq);
 }
 
-static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
-			    struct mlx5e_params *params,
-			    struct mlx5e_sq_param *param,
-			    struct mlx5e_xdpsq *sq,
-			    bool is_redirect)
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_xdpsq *sq, bool is_redirect)
 {
 	struct mlx5e_create_sq_param csp = {};
 	int err;
 
-	err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+	err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
 	if (err)
 		return err;
 
@@ -1440,7 +1478,7 @@ err_free_xdpsq:
 	return err;
 }
 
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 
@@ -1448,7 +1486,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
 	napi_synchronize(&c->napi);
 
 	mlx5e_destroy_sq(c->mdev, sq->sqn);
-	mlx5e_free_xdpsq_descs(sq, rq);
+	mlx5e_free_xdpsq_descs(sq);
 	mlx5e_free_xdpsq(sq);
 }
 
@@ -1518,6 +1556,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq)
 
 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 {
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	struct mlx5_core_dev *mdev = cq->mdev;
 	struct mlx5_core_cq *mcq = &cq->mcq;
 
@@ -1552,7 +1591,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 					    MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
-	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
 
 	kvfree(in);
 
@@ -1569,10 +1608,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
 	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
 }
 
-static int mlx5e_open_cq(struct mlx5e_channel *c,
-			 struct net_dim_cq_moder moder,
-			 struct mlx5e_cq_param *param,
-			 struct mlx5e_cq *cq)
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+		  struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
 {
 	struct mlx5_core_dev *mdev = c->mdev;
 	int err;
@@ -1595,7 +1632,7 @@ err_free_cq:
 	return err;
 }
 
-static void mlx5e_close_cq(struct mlx5e_cq *cq)
+void mlx5e_close_cq(struct mlx5e_cq *cq)
 {
 	mlx5e_destroy_cq(cq);
 	mlx5e_free_cq(cq);
@@ -1769,49 +1806,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
 	free_cpumask_var(c->xps_cpumask);
 }
 
-static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
-			      struct mlx5e_params *params,
-			      struct mlx5e_channel_param *cparam,
-			      struct mlx5e_channel **cp)
+static int mlx5e_open_queues(struct mlx5e_channel *c,
+			     struct mlx5e_params *params,
+			     struct mlx5e_channel_param *cparam)
 {
-	int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
-	struct net_dim_cq_moder icocq_moder = {0, 0};
-	struct net_device *netdev = priv->netdev;
-	struct mlx5e_channel *c;
-	unsigned int irq;
+	struct dim_cq_moder icocq_moder = {0, 0};
 	int err;
-	int eqn;
-
-	err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
-	if (err)
-		return err;
-
-	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
-	if (!c)
-		return -ENOMEM;
-
-	c->priv     = priv;
-	c->mdev     = priv->mdev;
-	c->tstamp   = &priv->tstamp;
-	c->ix       = ix;
-	c->cpu      = cpu;
-	c->pdev     = priv->mdev->device;
-	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
-	c->num_tc   = params->num_tc;
-	c->xdp      = !!params->xdp_prog;
-	c->stats    = &priv->channel_stats[ix].ch;
-	c->irq_desc = irq_to_desc(irq);
-
-	err = mlx5e_alloc_xps_cpumask(c, params);
-	if (err)
-		goto err_free_channel;
-
-	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
 	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
 	if (err)
-		goto err_napi_del;
+		return err;
 
 	err = mlx5e_open_tx_cqs(c, params, cparam);
 	if (err)
@@ -1827,7 +1831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
 	/* XDP SQ CQ params are same as normal TXQ sq CQ params */
 	err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
-				     &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
+				     &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
 	if (err)
 		goto err_close_rx_cq;
 
@@ -1841,20 +1845,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	if (err)
 		goto err_close_icosq;
 
-	err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
-	if (err)
-		goto err_close_sqs;
+	if (c->xdp) {
+		err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
+				       &c->rq_xdpsq, false);
+		if (err)
+			goto err_close_sqs;
+	}
 
-	err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
+	err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
 	if (err)
 		goto err_close_xdp_sq;
 
-	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
 	if (err)
 		goto err_close_rq;
 
-	*cp = c;
-
 	return 0;
 
 err_close_rq:
@@ -1862,7 +1867,7 @@ err_close_rq:
 
 err_close_xdp_sq:
 	if (c->xdp)
-		mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
+		mlx5e_close_xdpsq(&c->rq_xdpsq);
 
 err_close_sqs:
 	mlx5e_close_sqs(c);
@@ -1872,8 +1877,9 @@ err_close_icosq:
 
 err_disable_napi:
 	napi_disable(&c->napi);
+
 	if (c->xdp)
-		mlx5e_close_cq(&c->rq.xdpsq.cq);
+		mlx5e_close_cq(&c->rq_xdpsq.cq);
 
 err_close_rx_cq:
 	mlx5e_close_cq(&c->rq.cq);
@@ -1887,6 +1893,85 @@ err_close_tx_cqs:
 err_close_icosq_cq:
 	mlx5e_close_cq(&c->icosq.cq);
 
+	return err;
+}
+
+static void mlx5e_close_queues(struct mlx5e_channel *c)
+{
+	mlx5e_close_xdpsq(&c->xdpsq);
+	mlx5e_close_rq(&c->rq);
+	if (c->xdp)
+		mlx5e_close_xdpsq(&c->rq_xdpsq);
+	mlx5e_close_sqs(c);
+	mlx5e_close_icosq(&c->icosq);
+	napi_disable(&c->napi);
+	if (c->xdp)
+		mlx5e_close_cq(&c->rq_xdpsq.cq);
+	mlx5e_close_cq(&c->rq.cq);
+	mlx5e_close_cq(&c->xdpsq.cq);
+	mlx5e_close_tx_cqs(c);
+	mlx5e_close_cq(&c->icosq.cq);
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+			      struct mlx5e_params *params,
+			      struct mlx5e_channel_param *cparam,
+			      struct xdp_umem *umem,
+			      struct mlx5e_channel **cp)
+{
+	int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
+	struct net_device *netdev = priv->netdev;
+	struct mlx5e_xsk_param xsk;
+	struct mlx5e_channel *c;
+	unsigned int irq;
+	int err;
+	int eqn;
+
+	err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+	if (err)
+		return err;
+
+	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+	if (!c)
+		return -ENOMEM;
+
+	c->priv     = priv;
+	c->mdev     = priv->mdev;
+	c->tstamp   = &priv->tstamp;
+	c->ix       = ix;
+	c->cpu      = cpu;
+	c->pdev     = priv->mdev->device;
+	c->netdev   = priv->netdev;
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	c->num_tc   = params->num_tc;
+	c->xdp      = !!params->xdp_prog;
+	c->stats    = &priv->channel_stats[ix].ch;
+	c->irq_desc = irq_to_desc(irq);
+
+	err = mlx5e_alloc_xps_cpumask(c, params);
+	if (err)
+		goto err_free_channel;
+
+	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+
+	err = mlx5e_open_queues(c, params, cparam);
+	if (unlikely(err))
+		goto err_napi_del;
+
+	if (umem) {
+		mlx5e_build_xsk_param(umem, &xsk);
+		err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+		if (unlikely(err))
+			goto err_close_queues;
+	}
+
+	*cp = c;
+
+	return 0;
+
+err_close_queues:
+	mlx5e_close_queues(c);
+
 err_napi_del:
 	netif_napi_del(&c->napi);
 	mlx5e_free_xps_cpumask(c);
@@ -1905,12 +1990,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 		mlx5e_activate_txqsq(&c->sq[tc]);
 	mlx5e_activate_rq(&c->rq);
 	netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
+
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_activate_xsk(c);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 {
 	int tc;
 
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_deactivate_xsk(c);
+
 	mlx5e_deactivate_rq(&c->rq);
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_deactivate_txqsq(&c->sq[tc]);
@@ -1918,19 +2009,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
-	mlx5e_close_xdpsq(&c->xdpsq, NULL);
-	mlx5e_close_rq(&c->rq);
-	if (c->xdp)
-		mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
-	mlx5e_close_sqs(c);
-	mlx5e_close_icosq(&c->icosq);
-	napi_disable(&c->napi);
-	if (c->xdp)
-		mlx5e_close_cq(&c->rq.xdpsq.cq);
-	mlx5e_close_cq(&c->rq.cq);
-	mlx5e_close_cq(&c->xdpsq.cq);
-	mlx5e_close_tx_cqs(c);
-	mlx5e_close_cq(&c->icosq.cq);
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_close_xsk(c);
+	mlx5e_close_queues(c);
 	netif_napi_del(&c->napi);
 	mlx5e_free_xps_cpumask(c);
 
@@ -1941,6 +2022,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 
 static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 				      struct mlx5e_params *params,
+				      struct mlx5e_xsk_param *xsk,
 				      struct mlx5e_rq_frags_info *info)
 {
 	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
@@ -1953,10 +2035,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 		byte_count += MLX5E_METADATA_ETHER_LEN;
 #endif
 
-	if (mlx5e_rx_is_linear_skb(params)) {
+	if (mlx5e_rx_is_linear_skb(params, xsk)) {
 		int frag_stride;
 
-		frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+		frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
 		frag_stride = roundup_pow_of_two(frag_stride);
 
 		info->arr[0].frag_size = byte_count;
@@ -2014,9 +2096,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
 	return MLX5_GET(wq, wq, log_wq_sz);
 }
 
-static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
-				 struct mlx5e_params *params,
-				 struct mlx5e_rq_param *param)
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct mlx5e_rq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
@@ -2026,16 +2109,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
 			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
 			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
-		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
-		mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
 		ndsegs = param->frags_info.num_frags;
 	}
 
@@ -2066,8 +2149,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 	param->wq.buf_numa_node = dev_to_node(mdev->device);
 }
 
-static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
-					struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+				 struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2103,9 +2186,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
 }
 
-static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_cq_param *param)
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk,
+			     struct mlx5e_cq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *cqc = param->cqc;
@@ -2113,8 +2197,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
-			mlx5e_mpwqe_get_log_num_strides(mdev, params);
+		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		log_cq_size = params->log_rq_mtu_frames;
@@ -2130,9 +2214,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
-static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_cq_param *param)
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
@@ -2142,9 +2226,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
 }
 
-static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
-				     u8 log_wq_size,
-				     struct mlx5e_cq_param *param)
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+			      u8 log_wq_size,
+			      struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
@@ -2152,12 +2236,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
 
 	mlx5e_build_common_cq_param(priv, param);
 
-	param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
-static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
-				    u8 log_wq_size,
-				    struct mlx5e_sq_param *param)
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+			     u8 log_wq_size,
+			     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2168,9 +2252,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 }
 
-static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_sq_param *param)
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2198,14 +2282,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
 {
 	u8 icosq_log_wq_sz;
 
-	mlx5e_build_rq_param(priv, params, &cparam->rq);
+	mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
 
 	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
 
 	mlx5e_build_sq_param(priv, params, &cparam->sq);
 	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
 	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
-	mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+	mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
 	mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
 }
@@ -2226,7 +2310,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 
 	mlx5e_build_channel_param(priv, &chs->params, cparam);
 	for (i = 0; i < chs->num; i++) {
-		err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+		struct xdp_umem *umem = NULL;
+
+		if (chs->params.xdp_prog)
+			umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+
+		err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
 		if (err)
 			goto err_close_channels;
 	}
@@ -2268,6 +2357,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
 		int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
 
 		err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+
+		/* Don't wait on the XSK RQ, because the newer xdpsock sample
+		 * doesn't provide any Fill Ring entries at the setup stage.
+		 */
 	}
 
 	return err ? -ETIMEDOUT : 0;
@@ -2340,35 +2433,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
 	return err;
 }
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	struct mlx5e_rqt *rqt;
+	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int err;
 	int ix;
 
-	for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
-		rqt = &priv->direct_tir[ix].rqt;
-		err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
-		if (err)
+	for (ix = 0; ix < max_nch; ix++) {
+		err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
+		if (unlikely(err))
 			goto err_destroy_rqts;
 	}
 
 	return 0;
 
 err_destroy_rqts:
-	mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+	mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+		mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
 
 	return err;
 }
 
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
+	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int i;
 
-	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++)
-		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	for (i = 0; i < max_nch; i++)
+		mlx5e_destroy_rqt(priv, &tirs[i].rqt);
 }
 
 static int mlx5e_rx_hash_fn(int hfunc)
@@ -2788,11 +2881,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
 	int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+	int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS;
 	struct net_device *netdev = priv->netdev;
 
 	mlx5e_netdev_set_tcs(netdev);
 	netif_set_real_num_tx_queues(netdev, num_txqs);
-	netif_set_real_num_rx_queues(netdev, priv->channels.num);
+	netif_set_real_num_rx_queues(netdev, num_rxqs);
 
 	mlx5e_build_tx2sq_maps(priv);
 	mlx5e_activate_channels(&priv->channels);
@@ -2804,10 +2898,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 
 	mlx5e_wait_channels_min_rx_wqes(&priv->channels);
 	mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+
+	mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
 }
 
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
+	mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
+
 	mlx5e_redirect_rqts_to_drop(priv);
 
 	if (mlx5e_is_vport_rep(priv))
@@ -2847,7 +2945,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 	if (hw_modify)
 		hw_modify(priv);
 
-	mlx5e_refresh_tirs(priv, false);
+	priv->profile->update_rx(priv);
 	mlx5e_activate_priv_channels(priv);
 
 	/* return carrier back if needed */
@@ -2886,15 +2984,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 int mlx5e_open_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	bool is_xdp = priv->channels.params.xdp_prog;
 	int err;
 
 	set_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (is_xdp)
+		mlx5e_xdp_set_open(priv);
 
 	err = mlx5e_open_channels(priv, &priv->channels);
 	if (err)
 		goto err_clear_state_opened_flag;
 
-	mlx5e_refresh_tirs(priv, false);
+	priv->profile->update_rx(priv);
 	mlx5e_activate_priv_channels(priv);
 	if (priv->profile->update_carrier)
 		priv->profile->update_carrier(priv);
@@ -2903,6 +3004,8 @@ int mlx5e_open_locked(struct net_device *netdev)
 	return 0;
 
 err_clear_state_opened_flag:
+	if (is_xdp)
+		mlx5e_xdp_set_closed(priv);
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
 }
@@ -2934,6 +3037,8 @@ int mlx5e_close_locked(struct net_device *netdev)
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return 0;
 
+	if (priv->channels.params.xdp_prog)
+		mlx5e_xdp_set_closed(priv);
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	netif_carrier_off(priv->netdev);
@@ -3045,20 +3150,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
 	mlx5e_free_cq(&drop_rq->cq);
 }
 
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
-		     u32 underlay_qpn, u32 *tisn)
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
 {
-	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	MLX5_SET(tisc, tisc, prio, tc << 1);
-	MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
 	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
+	if (MLX5_GET(tisc, tisc, tls_en))
+		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+
 	if (mlx5_lag_is_lacp_owner(mdev))
 		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
 
-	return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+	return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
 }
 
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3072,7 +3176,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv)
 	int tc;
 
 	for (tc = 0; tc < priv->profile->max_tc; tc++) {
-		err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
+		u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+		void *tisc;
+
+		tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+		MLX5_SET(tisc, tisc, prio, tc << 1);
+
+		err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]);
 		if (err)
 			goto err_close_tises;
 	}
@@ -3190,13 +3301,13 @@ err_destroy_inner_tirs:
 	return err;
 }
 
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
-	int err;
+	int err = 0;
 	u32 *in;
 	int ix;
 
@@ -3205,25 +3316,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
 	if (!in)
 		return -ENOMEM;
 
-	for (ix = 0; ix < nch; ix++) {
+	for (ix = 0; ix < max_nch; ix++) {
 		memset(in, 0, inlen);
-		tir = &priv->direct_tir[ix];
+		tir = &tirs[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-		mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+		mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
 		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
-		if (err)
+		if (unlikely(err))
 			goto err_destroy_ch_tirs;
 	}
 
-	kvfree(in);
-
-	return 0;
+	goto out;
 
 err_destroy_ch_tirs:
-	mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+	mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+		mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
 
+out:
 	kvfree(in);
 
 	return err;
@@ -3243,13 +3353,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 		mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
 }
 
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int i;
 
-	for (i = 0; i < nch; i++)
-		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+	for (i = 0; i < max_nch; i++)
+		mlx5e_destroy_tir(priv->mdev, &tirs[i]);
 }
 
 static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
@@ -3316,17 +3426,17 @@ out:
 
 #ifdef CONFIG_MLX5_ESWITCH
 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
-				     struct tc_cls_flower_offload *cls_flower,
+				     struct flow_cls_offload *cls_flower,
 				     int flags)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
 					      flags);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
 					   flags);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
 					  flags);
 	default:
@@ -3347,36 +3457,22 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 		return -EOPNOTSUPP;
 	}
 }
-
-static int mlx5e_setup_tc_block(struct net_device *dev,
-				struct tc_block_offload *f)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
-					     priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
-					priv);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
 #endif
 
+static LIST_HEAD(mlx5e_block_cb_list);
+
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
 	case TC_SETUP_BLOCK:
-		return mlx5e_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &mlx5e_block_cb_list,
+						  mlx5e_setup_tc_block_cb,
+						  priv, priv, true);
 #endif
 	case TC_SETUP_QDISC_MQPRIO:
 		return mlx5e_setup_tc_mqprio(dev, type_data);
@@ -3391,11 +3487,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
 
 	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
 		struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
 		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
 		int j;
 
-		s->rx_packets   += rq_stats->packets;
-		s->rx_bytes     += rq_stats->bytes;
+		s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
+		s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
 
 		for (j = 0; j < priv->max_opened_tc; j++) {
 			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -3494,6 +3591,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 
 	mutex_lock(&priv->state_lock);
 
+	if (enable && priv->xsk.refcnt) {
+		netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
+			    priv->xsk.refcnt);
+		err = -EINVAL;
+		goto out;
+	}
+
 	old_params = &priv->channels.params;
 	if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
 		netdev_warn(netdev, "can't set LRO with legacy RQ\n");
@@ -3507,8 +3611,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 	new_channels.params.lro_en = enable;
 
 	if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
-		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
-		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
+		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
 			reset = false;
 	}
 
@@ -3698,6 +3802,43 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 	return features;
 }
 
+static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+				   struct mlx5e_channels *chs,
+				   struct mlx5e_params *new_params,
+				   struct mlx5_core_dev *mdev)
+{
+	u16 ix;
+
+	for (ix = 0; ix < chs->params.num_channels; ix++) {
+		struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+		struct mlx5e_xsk_param xsk;
+
+		if (!umem)
+			continue;
+
+		mlx5e_build_xsk_param(umem, &xsk);
+
+		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
+			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+			int max_mtu_frame, max_mtu_page, max_mtu;
+
+			/* Two criteria must be met:
+			 * 1. HW MTU + all headrooms <= XSK frame size.
+			 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
+			 */
+			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+			max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+			max_mtu = min(max_mtu_frame, max_mtu_page);
+
+			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
+				   new_params->sw_mtu, ix, max_mtu);
+			return false;
+		}
+	}
+
+	return true;
+}
+
 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		     change_hw_mtu_cb set_mtu_cb)
 {
@@ -3718,18 +3859,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	new_channels.params.sw_mtu = new_mtu;
 
 	if (params->xdp_prog &&
-	    !mlx5e_rx_is_linear_skb(&new_channels.params)) {
+	    !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
 		netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
-			   new_mtu, mlx5e_xdp_max_mtu(params));
+			   new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (priv->xsk.refcnt &&
+	    !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
+				    &new_channels.params, priv->mdev)) {
 		err = -EINVAL;
 		goto out;
 	}
 
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
-		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
-		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+							      &new_channels.params,
+							      NULL);
+		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
+		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
+
+		/* If XSK is active, XSK RQs are linear. */
+		is_linear |= priv->xsk.refcnt;
 
+		/* Always reset in linear mode - hw_mtu is used in data path. */
 		reset = reset && (is_linear || (ppw_old != ppw_new));
 	}
 
@@ -4162,16 +4316,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
 	new_channels.params = priv->channels.params;
 	new_channels.params.xdp_prog = prog;
 
-	if (!mlx5e_rx_is_linear_skb(&new_channels.params)) {
+	/* No XSK params: AF_XDP can't be enabled yet at the point of setting
+	 * the XDP program.
+	 */
+	if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
 		netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
 			    new_channels.params.sw_mtu,
-			    mlx5e_xdp_max_mtu(&new_channels.params));
+			    mlx5e_xdp_max_mtu(&new_channels.params, NULL));
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
+{
+	if (priv->channels.params.xdp_prog)
+		mlx5e_xdp_set_open(priv);
+	else
+		mlx5e_xdp_set_closed(priv);
+
+	return 0;
+}
+
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4192,8 +4359,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
 
-	if (was_opened && reset)
-		mlx5e_close_locked(netdev);
 	if (was_opened && !reset) {
 		/* num_channels is invariant here, so we can take the
 		 * batched reference right upfront.
@@ -4205,20 +4370,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		}
 	}
 
-	/* exchange programs, extra prog reference we got from caller
-	 * as long as we don't fail from this point onwards.
-	 */
-	old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	if (was_opened && reset) {
+		struct mlx5e_channels new_channels = {};
+
+		new_channels.params = priv->channels.params;
+		new_channels.params.xdp_prog = prog;
+		mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+		old_prog = priv->channels.params.xdp_prog;
+
+		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
+		if (err)
+			goto unlock;
+	} else {
+		/* exchange programs, extra prog reference we got from caller
+		 * as long as we don't fail from this point onwards.
+		 */
+		old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	}
+
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (reset) /* change RQ type according to priv->xdp_prog */
+	if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
 		mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
 
-	if (was_opened && reset)
-		err = mlx5e_open_locked(netdev);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+	if (!was_opened || reset)
 		goto unlock;
 
 	/* exchanging programs w/o reset, we update ref counts on behalf
@@ -4226,19 +4402,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	 */
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
+		bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
 
 		clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+		if (xsk_open)
+			clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
 		napi_synchronize(&c->napi);
 		/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
 
 		old_prog = xchg(&c->rq.xdp_prog, prog);
+		if (old_prog)
+			bpf_prog_put(old_prog);
+
+		if (xsk_open) {
+			old_prog = xchg(&c->xskrq.xdp_prog, prog);
+			if (old_prog)
+				bpf_prog_put(old_prog);
+		}
 
 		set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+		if (xsk_open)
+			set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
 		/* napi_schedule in case we have missed anything */
 		napi_schedule(&c->napi);
-
-		if (old_prog)
-			bpf_prog_put(old_prog);
 	}
 
 unlock:
@@ -4269,6 +4455,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx5e_xdp_query(dev);
 		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+					    xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
 	}
@@ -4351,6 +4540,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
 	.ndo_bpf		 = mlx5e_xdp,
 	.ndo_xdp_xmit            = mlx5e_xdp_xmit,
+	.ndo_xsk_async_xmit      = mlx5e_xsk_async_xmit,
 #ifdef CONFIG_MLX5_EN_ARFS
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
@@ -4420,9 +4610,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
 		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 }
 
-static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@@ -4433,9 +4623,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 	return moder;
 }
 
-static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
@@ -4449,8 +4639,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
 static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
 {
 	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
-		NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
-		NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+		DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
@@ -4502,11 +4692,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
 	 * - Striding RQ configuration is not possible/supported.
 	 * - Slow PCI heuristic.
 	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+	 *
+	 * No XSK params: checking the availability of striding RQ in general.
 	 */
 	if (!slow_pci_heuristic(mdev) &&
 	    mlx5e_striding_rq_possible(mdev, params) &&
-	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
-	     !mlx5e_rx_is_linear_skb(params)))
+	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+	     !mlx5e_rx_is_linear_skb(params, NULL)))
 		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
 	mlx5e_set_rq_type(mdev, params);
 	mlx5e_init_rq_type_params(mdev, params);
@@ -4528,6 +4720,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
 }
 
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+			    struct mlx5e_xsk *xsk,
 			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
 			    u16 max_channels, u16 mtu)
@@ -4563,9 +4756,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	/* HW LRO */
 
 	/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
-	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+		/* No XSK params: checking the availability of striding RQ in general. */
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
 			params->lro_en = !slow_pci_heuristic(mdev);
+	}
 	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
@@ -4584,13 +4779,16 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	mlx5e_build_rss_params(rss_params, params->num_channels);
 	params->tunneled_offload_en =
 		mlx5e_tunnel_inner_ft_supported(mdev);
+
+	/* AF_XDP */
+	params->xsk = xsk;
 }
 
 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+	mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
 	if (is_zero_ether_addr(netdev->dev_addr) &&
 	    !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
 		eth_hw_addr_random(netdev);
@@ -4619,14 +4817,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;
 
 	netdev->vlan_features    |= NETIF_F_SG;
-	netdev->vlan_features    |= NETIF_F_IP_CSUM;
-	netdev->vlan_features    |= NETIF_F_IPV6_CSUM;
+	netdev->vlan_features    |= NETIF_F_HW_CSUM;
 	netdev->vlan_features    |= NETIF_F_GRO;
 	netdev->vlan_features    |= NETIF_F_TSO;
 	netdev->vlan_features    |= NETIF_F_TSO6;
 	netdev->vlan_features    |= NETIF_F_RXCSUM;
 	netdev->vlan_features    |= NETIF_F_RXHASH;
 
+	netdev->mpls_features    |= NETIF_F_SG;
+	netdev->mpls_features    |= NETIF_F_HW_CSUM;
+	netdev->mpls_features    |= NETIF_F_TSO;
+	netdev->mpls_features    |= NETIF_F_TSO6;
+
 	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
 
@@ -4642,8 +4844,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
 	    MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
-		netdev->hw_enc_features |= NETIF_F_IP_CSUM;
-		netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+		netdev->hw_enc_features |= NETIF_F_HW_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
 		netdev->hw_enc_features |= NETIF_F_TSO6;
 		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
@@ -4756,7 +4957,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	if (err)
 		return err;
 
-	mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+	mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
 			       mlx5e_get_netdev_max_channels(netdev),
 			       netdev->mtu);
 
@@ -4798,7 +4999,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -4806,14 +5007,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
+	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
+	if (unlikely(err))
+		goto err_destroy_direct_tirs;
+
+	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
+	if (unlikely(err))
+		goto err_destroy_xsk_rqts;
+
 	err = mlx5e_create_flow_steering(priv);
 	if (err) {
 		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-		goto err_destroy_direct_tirs;
+		goto err_destroy_xsk_tirs;
 	}
 
 	err = mlx5e_tc_nic_init(priv);
@@ -4824,12 +5033,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 
 err_destroy_flow_steering:
 	mlx5e_destroy_flow_steering(priv);
+err_destroy_xsk_tirs:
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+err_destroy_xsk_rqts:
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv, true);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -4843,9 +5056,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
 {
 	mlx5e_tc_nic_cleanup(priv);
 	mlx5e_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 	mlx5e_destroy_indirect_tirs(priv, true);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 	mlx5e_destroy_q_counters(priv);
@@ -4927,6 +5142,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 	mlx5_lag_remove(mdev);
 }
 
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+	return mlx5e_refresh_tirs(priv, false);
+}
+
 static const struct mlx5e_profile mlx5e_nic_profile = {
 	.init		   = mlx5e_nic_init,
 	.cleanup	   = mlx5e_nic_cleanup,
@@ -4936,6 +5156,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 	.cleanup_tx	   = mlx5e_cleanup_nic_tx,
 	.enable		   = mlx5e_nic_enable,
 	.disable	   = mlx5e_nic_disable,
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = mlx5e_update_ndo_stats,
 	.update_carrier	   = mlx5e_update_carrier,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe,
@@ -4995,7 +5216,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
 	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
 				    nch * profile->max_tc,
-				    nch);
+				    nch * MLX5E_NUM_RQ_GROUPS);
 	if (!netdev) {
 		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
 		return NULL;
@@ -5133,7 +5354,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
 
 #ifdef CONFIG_MLX5_ESWITCH
 	if (MLX5_ESWITCH_MANAGER(mdev) &&
-	    mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+	    mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
 		mlx5e_rep_register_vport_reps(mdev);
 		return mdev;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2f406b161bcf..10ef90a7bddd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -37,6 +37,7 @@
 #include <net/act_api.h>
 #include <net/netevent.h>
 #include <net/arp.h>
+#include <net/devlink.h>
 
 #include "eswitch.h"
 #include "en.h"
@@ -128,7 +129,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
 	}
 }
 
-static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -166,17 +167,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
 	vport_stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
 }
 
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
-
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		mlx5e_uplink_rep_update_hw_counters(priv);
-	else
-		mlx5e_vf_rep_update_hw_counters(priv);
-}
-
 static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -203,7 +193,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 
 	mutex_lock(&priv->state_lock);
 	mlx5e_rep_update_sw_counters(priv);
-	mlx5e_rep_update_hw_counters(priv);
+	priv->profile->update_stats(priv);
 	mutex_unlock(&priv->state_lock);
 
 	for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
@@ -363,7 +353,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
 	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
 }
 
-static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = {
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.get_drvinfo	   = mlx5e_rep_get_drvinfo,
 	.get_link	   = ethtool_op_get_link,
 	.get_strings       = mlx5e_rep_get_strings,
@@ -402,30 +392,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
 static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
 					struct netdev_phys_item_id *ppid)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct net_device *uplink_upper = NULL;
-	struct mlx5e_priv *uplink_priv = NULL;
-	struct net_device *uplink_dev;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	struct mlx5_eswitch *esw;
+	struct mlx5e_priv *priv;
+	u64 parent_id;
 
-	uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
-	if (uplink_dev) {
-		uplink_upper = netdev_master_upper_dev_get(uplink_dev);
-		uplink_priv = netdev_priv(uplink_dev);
-	}
+	priv = netdev_priv(dev);
+	esw = priv->mdev->priv.eswitch;
 
-	ppid->id_len = ETH_ALEN;
-	if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
-		ether_addr_copy(ppid->id, uplink_upper->dev_addr);
-	} else {
-		struct mlx5e_rep_priv *rpriv = priv->ppriv;
-		struct mlx5_eswitch_rep *rep = rpriv->rep;
+	if (esw->mode == MLX5_ESWITCH_NONE)
+		return -EOPNOTSUPP;
 
-		ether_addr_copy(ppid->id, rep->hw_id);
-	}
+	parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
+	ppid->id_len = sizeof(parent_id);
+	memcpy(ppid->id, &parent_id, sizeof(parent_id));
 
 	return 0;
 }
@@ -436,7 +415,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 	struct mlx5e_rep_sq *rep_sq, *tmp;
 	struct mlx5e_rep_priv *rpriv;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return;
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -457,7 +436,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 	int err;
 	int i;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return 0;
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -677,7 +656,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
 
 static int
 mlx5e_rep_indr_offload(struct net_device *netdev,
-		       struct tc_cls_flower_offload *flower,
+		       struct flow_cls_offload *flower,
 		       struct mlx5e_rep_indr_block_priv *indr_priv)
 {
 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
@@ -685,13 +664,13 @@ mlx5e_rep_indr_offload(struct net_device *netdev,
 	int err = 0;
 
 	switch (flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
 		break;
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		err = mlx5e_delete_flower(netdev, priv, flower, flags);
 		break;
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		err = mlx5e_stats_flower(netdev, priv, flower, flags);
 		break;
 	default:
@@ -714,23 +693,39 @@ static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
 	}
 }
 
+static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv)
+{
+	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+	list_del(&indr_priv->list);
+	kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
 static int
 mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 			      struct mlx5e_rep_priv *rpriv,
-			      struct tc_block_offload *f)
+			      struct flow_block_offload *f)
 {
 	struct mlx5e_rep_indr_block_priv *indr_priv;
-	int err = 0;
+	struct flow_block_cb *block_cb;
 
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	f->driver_block_list = &mlx5e_block_cb_list;
+
 	switch (f->command) {
-	case TC_BLOCK_BIND:
+	case FLOW_BLOCK_BIND:
 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
 		if (indr_priv)
 			return -EEXIST;
 
+		if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb,
+					  indr_priv, &mlx5e_block_cb_list))
+			return -EBUSY;
+
 		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
 		if (!indr_priv)
 			return -ENOMEM;
@@ -740,26 +735,32 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 		list_add(&indr_priv->list,
 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
 
-		err = tcf_block_cb_register(f->block,
-					    mlx5e_rep_indr_setup_block_cb,
-					    indr_priv, indr_priv, f->extack);
-		if (err) {
+		block_cb = flow_block_cb_alloc(f->net,
+					       mlx5e_rep_indr_setup_block_cb,
+					       indr_priv, indr_priv,
+					       mlx5e_rep_indr_tc_block_unbind);
+		if (IS_ERR(block_cb)) {
 			list_del(&indr_priv->list);
 			kfree(indr_priv);
+			return PTR_ERR(block_cb);
 		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
 
-		return err;
-	case TC_BLOCK_UNBIND:
+		return 0;
+	case FLOW_BLOCK_UNBIND:
 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
 		if (!indr_priv)
 			return -ENOENT;
 
-		tcf_block_cb_unregister(f->block,
-					mlx5e_rep_indr_setup_block_cb,
-					indr_priv);
-		list_del(&indr_priv->list);
-		kfree(indr_priv);
+		block_cb = flow_block_cb_lookup(f,
+						mlx5e_rep_indr_setup_block_cb,
+						indr_priv);
+		if (!block_cb)
+			return -ENOENT;
 
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1101,7 +1102,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
 }
 
-static int mlx5e_vf_rep_open(struct net_device *dev)
+static int mlx5e_rep_open(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1124,7 +1125,7 @@ unlock:
 	return err;
 }
 
-static int mlx5e_vf_rep_close(struct net_device *dev)
+static int mlx5e_rep_close(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1141,42 +1142,18 @@ static int mlx5e_vf_rep_close(struct net_device *dev)
 	return ret;
 }
 
-static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
-					char *buf, size_t len)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	unsigned int fn;
-	int ret;
-
-	fn = PCI_FUNC(priv->mdev->pdev->devfn);
-	if (fn >= MLX5_MAX_PORTS)
-		return -EOPNOTSUPP;
-
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		ret = snprintf(buf, len, "p%d", fn);
-	else
-		ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1);
-
-	if (ret >= len)
-		return -EOPNOTSUPP;
-
-	return 0;
-}
-
 static int
 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
-			      struct tc_cls_flower_offload *cls_flower, int flags)
+			      struct flow_cls_offload *cls_flower, int flags)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
 					      flags);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
 					   flags);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
 					  flags);
 	default:
@@ -1198,32 +1175,16 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int mlx5e_rep_setup_tc_block(struct net_device *dev,
-				    struct tc_block_offload *f)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
-					     priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return mlx5e_rep_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data, NULL,
+						  mlx5e_rep_setup_tc_cb,
+						  priv, priv, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1276,7 +1237,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev
 }
 
 static void
-mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
@@ -1285,7 +1246,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
 }
 
-static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu)
+static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	return mlx5e_change_mtu(netdev, new_mtu, NULL);
 }
@@ -1318,17 +1279,24 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan
 	return 0;
 }
 
-static const struct net_device_ops mlx5e_netdev_ops_vf_rep = {
-	.ndo_open                = mlx5e_vf_rep_open,
-	.ndo_stop                = mlx5e_vf_rep_close,
+static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	return &rpriv->dl_port;
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+	.ndo_open                = mlx5e_rep_open,
+	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_setup_tc,
-	.ndo_get_stats64         = mlx5e_vf_rep_get_stats,
+	.ndo_get_devlink_port = mlx5e_get_devlink_port,
+	.ndo_get_stats64         = mlx5e_rep_get_stats,
 	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
-	.ndo_change_mtu          = mlx5e_vf_rep_change_mtu,
-	.ndo_get_port_parent_id	 = mlx5e_rep_get_port_parent_id,
+	.ndo_change_mtu          = mlx5e_rep_change_mtu,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
@@ -1336,8 +1304,8 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_set_mac_address     = mlx5e_uplink_rep_set_mac,
-	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_setup_tc,
+	.ndo_get_devlink_port = mlx5e_get_devlink_port,
 	.ndo_get_stats64         = mlx5e_get_stats,
 	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
@@ -1350,13 +1318,12 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 	.ndo_set_vf_vlan         = mlx5e_uplink_rep_set_vf_vlan,
-	.ndo_get_port_parent_id	 = mlx5e_rep_get_port_parent_id,
 	.ndo_set_features        = mlx5e_set_features,
 };
 
 bool mlx5e_eswitch_rep(struct net_device *netdev)
 {
-	if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
+	if (netdev->netdev_ops == &mlx5e_netdev_ops_rep ||
 	    netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
 		return true;
 
@@ -1412,16 +1379,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
 		SET_NETDEV_DEV(netdev, mdev->device);
 		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
 		/* we want a persistent mac for the uplink rep */
-		mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+		mlx5_query_mac_address(mdev, netdev->dev_addr);
 		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 		if (MLX5_CAP_GEN(mdev, qos))
 			netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
 #endif
 	} else {
-		netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep;
+		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
 		eth_hw_addr_random(netdev);
-		netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops;
+		netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
 	}
 
 	netdev->watchdog_timeo    = 15 * HZ;
@@ -1530,7 +1497,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -1538,7 +1505,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
@@ -1555,11 +1522,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 err_destroy_ttc_table:
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv, false);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -1573,9 +1540,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 
 	mlx5_del_flow_rules(rpriv->vport_rx_rule);
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 	mlx5e_destroy_indirect_tirs(priv, false);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 }
@@ -1642,11 +1609,16 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
 	}
 }
 
-static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
+static void mlx5e_rep_enable(struct mlx5e_priv *priv)
 {
 	mlx5e_set_netdev_mtu_boundaries(priv);
 }
 
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
 static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
 {
 	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
@@ -1714,15 +1686,16 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	mlx5_lag_remove(mdev);
 }
 
-static const struct mlx5e_profile mlx5e_vf_rep_profile = {
+static const struct mlx5e_profile mlx5e_rep_profile = {
 	.init			= mlx5e_init_rep,
 	.cleanup		= mlx5e_cleanup_rep,
 	.init_rx		= mlx5e_init_rep_rx,
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
 	.cleanup_tx		= mlx5e_cleanup_rep_tx,
-	.enable		        = mlx5e_vf_rep_enable,
-	.update_stats           = mlx5e_vf_rep_update_hw_counters,
+	.enable		        = mlx5e_rep_enable,
+	.update_rx		= mlx5e_update_rep_rx,
+	.update_stats           = mlx5e_rep_update_hw_counters,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= 1,
@@ -1737,6 +1710,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.cleanup_tx		= mlx5e_cleanup_rep_tx,
 	.enable		        = mlx5e_uplink_rep_enable,
 	.disable	        = mlx5e_uplink_rep_disable,
+	.update_rx		= mlx5e_update_rep_rx,
 	.update_stats           = mlx5e_uplink_rep_update_hw_counters,
 	.update_carrier	        = mlx5e_update_carrier,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
@@ -1744,6 +1718,55 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.max_tc			= MLX5E_MAX_NUM_TC,
 };
 
+static bool
+is_devlink_port_supported(const struct mlx5_core_dev *dev,
+			  const struct mlx5e_rep_priv *rpriv)
+{
+	return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
+	       rpriv->rep->vport == MLX5_VPORT_PF ||
+	       mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
+}
+
+static int register_devlink_port(struct mlx5_core_dev *dev,
+				 struct mlx5e_rep_priv *rpriv)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct netdev_phys_item_id ppid = {};
+	int ret;
+
+	if (!is_devlink_port_supported(dev, rpriv))
+		return 0;
+
+	ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+	if (ret)
+		return ret;
+
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		devlink_port_attrs_set(&rpriv->dl_port,
+				       DEVLINK_PORT_FLAVOUR_PHYSICAL,
+				       PCI_FUNC(dev->pdev->devfn), false, 0,
+				       &ppid.id[0], ppid.id_len);
+	else if (rep->vport == MLX5_VPORT_PF)
+		devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
+					      &ppid.id[0], ppid.id_len,
+					      dev->pdev->devfn);
+	else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport))
+		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
+					      &ppid.id[0], ppid.id_len,
+					      dev->pdev->devfn,
+					      rep->vport - 1);
+
+	return devlink_port_register(devlink, &rpriv->dl_port, rep->vport);
+}
+
+static void unregister_devlink_port(struct mlx5_core_dev *dev,
+				    struct mlx5e_rep_priv *rpriv)
+{
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_unregister(&rpriv->dl_port);
+}
+
 /* e-Switch vport representors */
 static int
 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
@@ -1761,7 +1784,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	rpriv->rep = rep;
 
 	nch = mlx5e_get_max_num_channels(dev);
-	profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+	profile = (rep->vport == MLX5_VPORT_UPLINK) ?
+		  &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
 	netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
 	if (!netdev) {
 		pr_warn("Failed to create representor netdev for vport %d\n",
@@ -1771,7 +1795,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	}
 
 	rpriv->netdev = netdev;
-	rep->rep_if[REP_ETH].priv = rpriv;
+	rep->rep_data[REP_ETH].priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
 	if (rep->vport == MLX5_VPORT_UPLINK) {
@@ -1794,15 +1818,27 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
+	err = register_devlink_port(dev, rpriv);
+	if (err) {
+		esw_warn(dev, "Failed to register devlink port %d\n",
+			 rep->vport);
+		goto err_neigh_cleanup;
+	}
+
 	err = register_netdev(netdev);
 	if (err) {
 		pr_warn("Failed to register representor netdev for vport %d\n",
 			rep->vport);
-		goto err_neigh_cleanup;
+		goto err_devlink_cleanup;
 	}
 
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_type_eth_set(&rpriv->dl_port, netdev);
 	return 0;
 
+err_devlink_cleanup:
+	unregister_devlink_port(dev, rpriv);
+
 err_neigh_cleanup:
 	mlx5e_rep_neigh_cleanup(rpriv);
 
@@ -1825,9 +1861,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *dev = priv->mdev;
 	void *ppriv = priv->ppriv;
 
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_type_clear(&rpriv->dl_port);
 	unregister_netdev(netdev);
+	unregister_devlink_port(dev, rpriv);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
 	if (rep->vport == MLX5_VPORT_UPLINK)
@@ -1845,16 +1885,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
 	return rpriv->netdev;
 }
 
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+	.load = mlx5e_vport_rep_load,
+	.unload = mlx5e_vport_rep_unload,
+	.get_proto_dev = mlx5e_vport_rep_get_proto_dev
+};
+
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	struct mlx5_eswitch_rep_if rep_if = {};
-
-	rep_if.load = mlx5e_vport_rep_load;
-	rep_if.unload = mlx5e_vport_rep_unload;
-	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 
-	mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
+	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
 }
 
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 83b573b1abac..c56e6ee4350c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -86,12 +86,13 @@ struct mlx5e_rep_priv {
 	struct mlx5_flow_handle *vport_rx_rule;
 	struct list_head       vport_sqs_list;
 	struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
+	struct devlink_port dl_port;
 };
 
 static inline
 struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
 {
-	return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+	return rep->rep_data[REP_ETH].priv;
 }
 
 struct mlx5e_neigh {
@@ -150,13 +151,12 @@ struct mlx5e_encap_entry {
 	struct hlist_node encap_hlist;
 	struct list_head flows;
 	u32 encap_id;
-	struct ip_tunnel_info tun_info;
+	const struct ip_tunnel_info *tun_info;
 	unsigned char h_dest[ETH_ALEN];	/* destination eth addr	*/
 
 	struct net_device *out_dev;
 	struct net_device *route_dev;
-	int tunnel_type;
-	int tunnel_hlen;
+	struct mlx5e_tc_tunnel *tunnel;
 	int reformat_type;
 	u8 flags;
 	char *encap_header;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 13133e7f088e..56a2f4666c47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,6 +34,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
+#include <linux/indirect_call_wrapper.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
 #include <net/inet_ecn.h>
@@ -46,6 +47,7 @@
 #include "en_accel/tls_rxtx.h"
 #include "lib/clock.h"
 #include "en/xdp.h"
+#include "en/xsk/rx.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 {
@@ -234,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
 	return true;
 }
 
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
-					  struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
+					struct mlx5e_dma_info *dma_info)
 {
 	if (mlx5e_rx_cache_get(rq, dma_info))
 		return 0;
@@ -247,7 +249,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
 	dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
 				      PAGE_SIZE, rq->buff.map_dir);
 	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
-		put_page(dma_info->page);
+		page_pool_recycle_direct(rq->page_pool, dma_info->page);
 		dma_info->page = NULL;
 		return -ENOMEM;
 	}
@@ -255,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
 	return 0;
 }
 
+static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
+				   struct mlx5e_dma_info *dma_info)
+{
+	if (rq->umem)
+		return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+	else
+		return mlx5e_page_alloc_pool(rq, dma_info);
+}
+
 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
 {
 	dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
 }
 
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
-			bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+				struct mlx5e_dma_info *dma_info,
+				bool recycle)
 {
 	if (likely(recycle)) {
 		if (mlx5e_rx_cache_put(rq, dma_info))
@@ -271,10 +283,25 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 		page_pool_recycle_direct(rq->page_pool, dma_info->page);
 	} else {
 		mlx5e_page_dma_unmap(rq, dma_info);
+		page_pool_release_page(rq->page_pool, dma_info->page);
 		put_page(dma_info->page);
 	}
 }
 
+static inline void mlx5e_page_release(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info,
+				      bool recycle)
+{
+	if (rq->umem)
+		/* The `recycle` parameter is ignored, and the page is always
+		 * put into the Reuse Ring, because there is no way to return
+		 * the page to the userspace when the interface goes down.
+		 */
+		mlx5e_xsk_page_release(rq, dma_info);
+	else
+		mlx5e_page_release_dynamic(rq, dma_info, recycle);
+}
+
 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
 				    struct mlx5e_wqe_frag_info *frag)
 {
@@ -286,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
 		 * offset) should just use the new one without replenishing again
 		 * by themselves.
 		 */
-		err = mlx5e_page_alloc_mapped(rq, frag->di);
+		err = mlx5e_page_alloc(rq, frag->di);
 
 	return err;
 }
@@ -352,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
 	int err;
 	int i;
 
+	if (rq->umem) {
+		int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
+
+		if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+			return -ENOMEM;
+	}
+
 	for (i = 0; i < wqe_bulk; i++) {
 		struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
 
@@ -399,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
 static void
 mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
 {
-	const bool no_xdp_xmit =
-		bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+	bool no_xdp_xmit;
 	struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
 	int i;
 
+	/* A common case for AF_XDP. */
+	if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+		return;
+
+	no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
+				   MLX5_MPWRQ_PAGES_PER_WQE);
+
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
 		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
 			mlx5e_page_release(rq, &dma_info[i], recycle);
@@ -425,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
-{
-	return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc);
-}
-
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
 					      u16 pi, u16 nnops)
@@ -457,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	int err;
 	int i;
 
+	if (rq->umem &&
+	    unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+		err = -ENOMEM;
+		goto err;
+	}
+
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
@@ -465,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	}
 
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
-		memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
-		       offsetof(struct mlx5e_umr_wqe, inline_mtts));
+	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
 
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
-		err = mlx5e_page_alloc_mapped(rq, dma_info);
+		err = mlx5e_page_alloc(rq, dma_info);
 		if (unlikely(err))
 			goto err_unmap;
 		umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
@@ -485,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
 	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+	sq->db.ico_wqe[pi].umr.rq = rq;
 	sq->pc += MLX5E_UMR_WQEBBS;
 
 	sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -496,6 +536,8 @@ err_unmap:
 		dma_info--;
 		mlx5e_page_release(rq, dma_info, true);
 	}
+
+err:
 	rq->stats->buff_alloc_err++;
 
 	return err;
@@ -542,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 	return !!err;
 }
 
-static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
 	struct mlx5_cqe64 *cqe;
-	u8  completed_umr = 0;
 	u16 sqcc;
 	int i;
 
@@ -587,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
 
 			if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
 				sqcc += MLX5E_UMR_WQEBBS;
-				completed_umr++;
+				wi->umr.rq->mpwqe.umr_completed++;
 			} else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
 				sqcc++;
 			} else {
@@ -603,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
 	sq->cc = sqcc;
 
 	mlx5_cqwq_update_db_record(&cq->wq);
-
-	if (likely(completed_umr)) {
-		mlx5e_post_rx_mpwqe(rq, completed_umr);
-		rq->mpwqe.umr_in_progress -= completed_umr;
-	}
 }
 
 bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 {
 	struct mlx5e_icosq *sq = &rq->channel->icosq;
 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+	u8  umr_completed = rq->mpwqe.umr_completed;
+	int alloc_err = 0;
 	u8  missing, i;
 	u16 head;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return false;
 
-	mlx5e_poll_ico_cq(&sq->cq, rq);
+	if (umr_completed) {
+		mlx5e_post_rx_mpwqe(rq, umr_completed);
+		rq->mpwqe.umr_in_progress -= umr_completed;
+		rq->mpwqe.umr_completed = 0;
+	}
 
 	missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
 
@@ -634,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	head = rq->mpwqe.actual_wq_head;
 	i = missing;
 	do {
-		if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head)))
+		alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+
+		if (unlikely(alloc_err))
 			break;
 		head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
 	} while (--i);
@@ -648,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
 	rq->mpwqe.actual_wq_head   = head;
 
+	/* If XSK Fill Ring doesn't have enough frames, busy poll by
+	 * rescheduling the NAPI poll.
+	 */
+	if (unlikely(alloc_err == -ENOMEM && rq->umem))
+		return true;
+
 	return false;
 }
 
@@ -1016,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	}
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
 	rcu_read_unlock();
 	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
@@ -1092,7 +1142,10 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1230,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 	prefetch(data);
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
 	rcu_read_unlock();
 	if (consumed) {
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
@@ -1279,8 +1332,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
 	cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
 
-	skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
-					   page_idx);
+	skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+			      mlx5e_skb_from_cqe_mpwrq_linear,
+			      mlx5e_skb_from_cqe_mpwrq_nonlinear,
+			      rq, wi, cqe_bcnt, head_offset, page_idx);
 	if (!skb)
 		goto mpwrq_cqe_out;
 
@@ -1327,7 +1382,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 
 		mlx5_cqwq_pop(cqwq);
 
-		rq->handle_rx_cqe(rq, cqe);
+		INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+				mlx5e_handle_rx_cqe, rq, cqe);
 	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
 out:
@@ -1437,7 +1493,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
 	if (!skb)
 		goto wq_free_wqe;
 
@@ -1469,7 +1528,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
 	if (unlikely(!skb)) {
 		/* a DROP, save the page-reuse checks */
 		mlx5e_free_rx_wqe(rq, wi, true);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4382ef85488c..840ec945ccba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_health *health = &priv->mdev->priv.health;
 
-	return health->sick ? 1 : 0;
+	return health->fatal_error ? 1 : 0;
 }
 
 static int mlx5e_test_link_state(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 483d321d2151..539b4d3656da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
 
 #ifdef CONFIG_MLX5_EN_TLS
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
 #endif
 
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
@@ -104,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
@@ -144,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 			&priv->channel_stats[i];
 		struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
 		struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+		struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq;
+		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
 		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
 		struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
 		int j;
@@ -186,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->ch_poll        += ch_stats->poll;
 		s->ch_arm         += ch_stats->arm;
 		s->ch_aff_change  += ch_stats->aff_change;
+		s->ch_force_irq   += ch_stats->force_irq;
 		s->ch_eq_rearm    += ch_stats->eq_rearm;
 		/* xdp redirect */
 		s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
@@ -194,6 +230,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->tx_xdp_full    += xdpsq_red_stats->full;
 		s->tx_xdp_err     += xdpsq_red_stats->err;
 		s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
+		/* AF_XDP zero-copy */
+		s->rx_xsk_packets                += xskrq_stats->packets;
+		s->rx_xsk_bytes                  += xskrq_stats->bytes;
+		s->rx_xsk_csum_complete          += xskrq_stats->csum_complete;
+		s->rx_xsk_csum_unnecessary       += xskrq_stats->csum_unnecessary;
+		s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+		s->rx_xsk_csum_none              += xskrq_stats->csum_none;
+		s->rx_xsk_ecn_mark               += xskrq_stats->ecn_mark;
+		s->rx_xsk_removed_vlan_packets   += xskrq_stats->removed_vlan_packets;
+		s->rx_xsk_xdp_drop               += xskrq_stats->xdp_drop;
+		s->rx_xsk_xdp_redirect           += xskrq_stats->xdp_redirect;
+		s->rx_xsk_wqe_err                += xskrq_stats->wqe_err;
+		s->rx_xsk_mpwqe_filler_cqes      += xskrq_stats->mpwqe_filler_cqes;
+		s->rx_xsk_mpwqe_filler_strides   += xskrq_stats->mpwqe_filler_strides;
+		s->rx_xsk_oversize_pkts_sw_drop  += xskrq_stats->oversize_pkts_sw_drop;
+		s->rx_xsk_buff_alloc_err         += xskrq_stats->buff_alloc_err;
+		s->rx_xsk_cqe_compress_blks      += xskrq_stats->cqe_compress_blks;
+		s->rx_xsk_cqe_compress_pkts      += xskrq_stats->cqe_compress_pkts;
+		s->rx_xsk_congst_umr             += xskrq_stats->congst_umr;
+		s->rx_xsk_arfs_err               += xskrq_stats->arfs_err;
+		s->tx_xsk_xmit                   += xsksq_stats->xmit;
+		s->tx_xsk_mpwqe                  += xsksq_stats->mpwqe;
+		s->tx_xsk_inlnw                  += xsksq_stats->inlnw;
+		s->tx_xsk_full                   += xsksq_stats->full;
+		s->tx_xsk_err                    += xsksq_stats->err;
+		s->tx_xsk_cqes                   += xsksq_stats->cqes;
 
 		for (j = 0; j < priv->max_opened_tc; j++) {
 			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -216,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 			s->tx_csum_none		+= sq_stats->csum_none;
 			s->tx_csum_partial	+= sq_stats->csum_partial;
 #ifdef CONFIG_MLX5_EN_TLS
-			s->tx_tls_ooo		+= sq_stats->tls_ooo;
-			s->tx_tls_resync_bytes	+= sq_stats->tls_resync_bytes;
+			s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+			s->tx_tls_encrypted_bytes   += sq_stats->tls_encrypted_bytes;
+			s->tx_tls_ctx               += sq_stats->tls_ctx;
+			s->tx_tls_ooo               += sq_stats->tls_ooo;
+			s->tx_tls_resync_bytes      += sq_stats->tls_resync_bytes;
+			s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+			s->tx_tls_drop_bypass_req   += sq_stats->tls_drop_bypass_req;
+			s->tx_tls_dump_bytes        += sq_stats->tls_dump_bytes;
+			s->tx_tls_dump_packets      += sq_stats->tls_dump_packets;
 #endif
 			s->tx_cqes		+= sq_stats->cqes;
 		}
@@ -1238,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = {
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+#ifdef CONFIG_MLX5_EN_TLS
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+#endif
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
@@ -1266,11 +1345,43 @@ static const struct counter_desc xdpsq_stats_desc[] = {
 	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
 };
 
+static const struct counter_desc xskrq_stats_desc[] = {
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+};
+
+static const struct counter_desc xsksq_stats_desc[] = {
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
 static const struct counter_desc ch_stats_desc[] = {
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
 };
 
@@ -1278,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = {
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 #define NUM_XDPSQ_STATS			ARRAY_SIZE(xdpsq_stats_desc)
 #define NUM_RQ_XDPSQ_STATS		ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_XSKRQ_STATS			ARRAY_SIZE(xskrq_stats_desc)
+#define NUM_XSKSQ_STATS			ARRAY_SIZE(xsksq_stats_desc)
 #define NUM_CH_STATS			ARRAY_SIZE(ch_stats_desc)
 
 static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
@@ -1288,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 	       (NUM_CH_STATS * max_nch) +
 	       (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
 	       (NUM_RQ_XDPSQ_STATS * max_nch) +
-	       (NUM_XDPSQ_STATS * max_nch);
+	       (NUM_XDPSQ_STATS * max_nch) +
+	       (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) +
+	       (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
 }
 
 static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 					   int idx)
 {
 	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	bool is_xsk = priv->xsk.ever_used;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
@@ -1306,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 		for (j = 0; j < NUM_RQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				rq_stats_desc[j].format, i);
+		for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				xskrq_stats_desc[j].format, i);
 		for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				rq_xdpsq_stats_desc[j].format, i);
@@ -1318,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 					sq_stats_desc[j].format,
 					priv->channel_tc2txq[i][tc]);
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < max_nch; i++) {
+		for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				xsksq_stats_desc[j].format, i);
 		for (j = 0; j < NUM_XDPSQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				xdpsq_stats_desc[j].format, i);
+	}
 
 	return idx;
 }
@@ -1330,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 					 int idx)
 {
 	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	bool is_xsk = priv->xsk.ever_used;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
@@ -1343,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
 						     rq_stats_desc, j);
+		for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+			data[idx++] =
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+						     xskrq_stats_desc, j);
 		for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
@@ -1356,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 					MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
 							     sq_stats_desc, j);
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < max_nch; i++) {
+		for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+			data[idx++] =
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+						     xsksq_stats_desc, j);
 		for (j = 0; j < NUM_XDPSQ_STATS; j++)
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
 						     xdpsq_stats_desc, j);
+	}
 
 	return idx;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index cdddcc46971b..76ac111e14d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -46,6 +46,8 @@
 #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
@@ -116,12 +118,46 @@ struct mlx5e_sw_stats {
 	u64 ch_poll;
 	u64 ch_arm;
 	u64 ch_aff_change;
+	u64 ch_force_irq;
 	u64 ch_eq_rearm;
 
 #ifdef CONFIG_MLX5_EN_TLS
+	u64 tx_tls_encrypted_packets;
+	u64 tx_tls_encrypted_bytes;
+	u64 tx_tls_ctx;
 	u64 tx_tls_ooo;
 	u64 tx_tls_resync_bytes;
+	u64 tx_tls_drop_no_sync_data;
+	u64 tx_tls_drop_bypass_req;
+	u64 tx_tls_dump_packets;
+	u64 tx_tls_dump_bytes;
 #endif
+
+	u64 rx_xsk_packets;
+	u64 rx_xsk_bytes;
+	u64 rx_xsk_csum_complete;
+	u64 rx_xsk_csum_unnecessary;
+	u64 rx_xsk_csum_unnecessary_inner;
+	u64 rx_xsk_csum_none;
+	u64 rx_xsk_ecn_mark;
+	u64 rx_xsk_removed_vlan_packets;
+	u64 rx_xsk_xdp_drop;
+	u64 rx_xsk_xdp_redirect;
+	u64 rx_xsk_wqe_err;
+	u64 rx_xsk_mpwqe_filler_cqes;
+	u64 rx_xsk_mpwqe_filler_strides;
+	u64 rx_xsk_oversize_pkts_sw_drop;
+	u64 rx_xsk_buff_alloc_err;
+	u64 rx_xsk_cqe_compress_blks;
+	u64 rx_xsk_cqe_compress_pkts;
+	u64 rx_xsk_congst_umr;
+	u64 rx_xsk_arfs_err;
+	u64 tx_xsk_xmit;
+	u64 tx_xsk_mpwqe;
+	u64 tx_xsk_inlnw;
+	u64 tx_xsk_full;
+	u64 tx_xsk_err;
+	u64 tx_xsk_cqes;
 };
 
 struct mlx5e_qcounter_stats {
@@ -227,8 +263,15 @@ struct mlx5e_sq_stats {
 	u64 added_vlan_packets;
 	u64 nop;
 #ifdef CONFIG_MLX5_EN_TLS
+	u64 tls_encrypted_packets;
+	u64 tls_encrypted_bytes;
+	u64 tls_ctx;
 	u64 tls_ooo;
 	u64 tls_resync_bytes;
+	u64 tls_drop_no_sync_data;
+	u64 tls_drop_bypass_req;
+	u64 tls_dump_packets;
+	u64 tls_dump_bytes;
 #endif
 	/* less likely accessed in data path */
 	u64 csum_none;
@@ -256,6 +299,7 @@ struct mlx5e_ch_stats {
 	u64 poll;
 	u64 arm;
 	u64 aff_change;
+	u64 force_irq;
 	u64 eq_rearm;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e40c60d1631f..2d6436257f9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -53,6 +53,7 @@
 #include "en/port.h"
 #include "en/tc_tun.h"
 #include "lib/devcom.h"
+#include "lib/geneve.h"
 
 struct mlx5_nic_flow_attr {
 	u32 action;
@@ -126,7 +127,7 @@ struct mlx5e_tc_flow {
 };
 
 struct mlx5e_tc_flow_parse_attr {
-	struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+	const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
 	struct net_device *filter_dev;
 	struct mlx5_flow_spec spec;
 	int num_mod_hdr_actions;
@@ -716,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow *flow,
 		      struct netlink_ext_ack *extack)
 {
+	struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
-		.flow_tag = attr->flow_tag,
 		.reformat_id = 0,
-		.flags    = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+		.flags    = FLOW_ACT_NO_APPEND,
 	};
 	struct mlx5_fc *counter = NULL;
 	bool table_created = false;
 	int err, dest_ix = 0;
 
+	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+	flow_context->flow_tag = attr->flow_tag;
+
 	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
 		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 		if (err) {
@@ -799,7 +803,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	}
 
 	if (attr->match_level != MLX5_MATCH_NONE)
-		parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
 	flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
 					    &flow_act, dest, dest_ix);
@@ -1063,6 +1067,19 @@ err_max_prio_chain:
 	return err;
 }
 
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+	void *headers_v = MLX5_ADDR_OF(fte_match_param,
+				       spec->match_value,
+				       misc_parameters_3);
+	u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
+					     headers_v,
+					     geneve_tlv_option_0_data);
+
+	return !!geneve_tlv_opt_0_data;
+}
+
 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 				  struct mlx5e_tc_flow *flow)
 {
@@ -1084,6 +1101,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
 	}
 
+	if (mlx5_flow_has_geneve_opt(flow))
+		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
+
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
@@ -1330,7 +1350,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			     struct mlx5_flow_spec *spec,
-			     struct tc_cls_flower_offload *f,
+			     struct flow_cls_offload *f,
 			     struct net_device *filter_dev, u8 *match_level)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
@@ -1338,8 +1358,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 				       outer_headers);
 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				       outer_headers);
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
-	struct flow_match_control enc_control;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	int err;
 
 	err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
@@ -1350,9 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		return err;
 	}
 
-	flow_rule_match_enc_control(rule, &enc_control);
-
-	if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
 		struct flow_match_ipv4_addrs match;
 
 		flow_rule_match_enc_ipv4_addrs(rule, &match);
@@ -1372,7 +1389,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
-	} else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
 		struct flow_match_ipv6_addrs match;
 
 		flow_rule_match_enc_ipv6_addrs(rule, &match);
@@ -1461,7 +1478,7 @@ static void *get_match_headers_value(u32 flags,
 
 static int __parse_cls_flower(struct mlx5e_priv *priv,
 			      struct mlx5_flow_spec *spec,
-			      struct tc_cls_flower_offload *f,
+			      struct flow_cls_offload *f,
 			      struct net_device *filter_dev,
 			      u8 *match_level, u8 *tunnel_match_level)
 {
@@ -1474,7 +1491,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
@@ -1497,29 +1514,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
 	      BIT(FLOW_DISSECTOR_KEY_IP)  |
-	      BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
 			    dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
-	     flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
-	     flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
-	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_match_control match;
-
-		flow_rule_match_enc_control(rule, &match);
-		switch (match.key->addr_type) {
-		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
-		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
-			if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
-				return -EOPNOTSUPP;
-			break;
-		default:
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+		if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
 			return -EOPNOTSUPP;
-		}
 
 		/* In decap flow, header pointers should point to the inner
 		 * headers, outer header were already set by parse_tunnel_attr
@@ -1822,7 +1831,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 static int parse_cls_flower(struct mlx5e_priv *priv,
 			    struct mlx5e_tc_flow *flow,
 			    struct mlx5_flow_spec *spec,
-			    struct tc_cls_flower_offload *f,
+			    struct flow_cls_offload *f,
 			    struct net_device *filter_dev)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
@@ -2581,21 +2590,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 }
 
 struct encap_key {
-	struct ip_tunnel_key *ip_tun_key;
-	int tunnel_type;
+	const struct ip_tunnel_key *ip_tun_key;
+	struct mlx5e_tc_tunnel *tc_tunnel;
 };
 
 static inline int cmp_encap_info(struct encap_key *a,
 				 struct encap_key *b)
 {
 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
-	       a->tunnel_type != b->tunnel_type;
+	       a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
 }
 
 static inline int hash_encap_info(struct encap_key *key)
 {
 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
-		     key->tunnel_type);
+		     key->tc_tunnel->tunnel_type);
 }
 
 
@@ -2625,7 +2634,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
-	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_info *tun_info;
 	struct encap_key key, e_key;
 	struct mlx5e_encap_entry *e;
 	unsigned short family;
@@ -2634,17 +2643,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	int err = 0;
 
 	parse_attr = attr->parse_attr;
-	tun_info = &parse_attr->tun_info[out_index];
+	tun_info = parse_attr->tun_info[out_index];
 	family = ip_tunnel_info_af(tun_info);
 	key.ip_tun_key = &tun_info->key;
-	key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev);
+	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
 
 	hash_key = hash_encap_info(&key);
 
 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 				   encap_hlist, hash_key) {
-		e_key.ip_tun_key = &e->tun_info.key;
-		e_key.tunnel_type = e->tunnel_type;
+		e_key.ip_tun_key = &e->tun_info->key;
+		e_key.tc_tunnel = e->tunnel;
 		if (!cmp_encap_info(&e_key, &key)) {
 			found = true;
 			break;
@@ -2659,7 +2668,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	if (!e)
 		return -ENOMEM;
 
-	e->tun_info = *tun_info;
+	e->tun_info = tun_info;
 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
 	if (err)
 		goto out_err;
@@ -2793,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
 	return err;
 }
 
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+				    struct net_device *out_dev)
+{
+	if (is_merged_eswitch_dev(priv, out_dev))
+		return true;
+
+	return mlx5e_eswitch_rep(out_dev) &&
+	       same_hw_devs(priv, netdev_priv(out_dev));
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				struct flow_action *flow_action,
 				struct mlx5e_tc_flow *flow,
@@ -2858,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 
 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
-			if (netdev_port_same_parent_id(priv->netdev,
-						       out_dev) ||
-			    is_merged_eswitch_dev(priv, out_dev)) {
+			if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
 				struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 				struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
 				struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
@@ -2877,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 					if (err)
 						return err;
 				}
+
 				if (is_vlan_dev(parse_attr->filter_dev)) {
 					err = add_vlan_pop_action(priv, attr,
 								  &action);
@@ -2884,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 						return err;
 				}
 
-				if (!mlx5e_eswitch_rep(out_dev))
+				if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+					NL_SET_ERR_MSG_MOD(extack,
+							   "devices are not on same switch HW, can't offload forwarding");
+					pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+					       priv->netdev->name, out_dev->name);
 					return -EOPNOTSUPP;
+				}
 
 				out_priv = netdev_priv(out_dev);
 				rpriv = out_priv->ppriv;
@@ -2895,7 +2918,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			} else if (encap) {
 				parse_attr->mirred_ifindex[attr->out_count] =
 					out_dev->ifindex;
-				parse_attr->tun_info[attr->out_count] = *info;
+				parse_attr->tun_info[attr->out_count] = info;
 				encap = false;
 				attr->dests[attr->out_count].flags |=
 					MLX5_ESW_DEST_ENCAP;
@@ -3092,7 +3115,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
 
 static int
 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
-		 struct tc_cls_flower_offload *f, u16 flow_flags,
+		 struct flow_cls_offload *f, u16 flow_flags,
 		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
 		 struct mlx5e_tc_flow **__flow)
 {
@@ -3126,7 +3149,7 @@ static void
 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
 			 struct mlx5e_priv *priv,
 			 struct mlx5e_tc_flow_parse_attr *parse_attr,
-			 struct tc_cls_flower_offload *f,
+			 struct flow_cls_offload *f,
 			 struct mlx5_eswitch_rep *in_rep,
 			 struct mlx5_core_dev *in_mdev)
 {
@@ -3148,13 +3171,13 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
 
 static struct mlx5e_tc_flow *
 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
-		     struct tc_cls_flower_offload *f,
+		     struct flow_cls_offload *f,
 		     u16 flow_flags,
 		     struct net_device *filter_dev,
 		     struct mlx5_eswitch_rep *in_rep,
 		     struct mlx5_core_dev *in_mdev)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_tc_flow *flow;
@@ -3198,7 +3221,7 @@ out:
 	return ERR_PTR(err);
 }
 
-static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
 				      struct mlx5e_tc_flow *flow,
 				      u16 flow_flags)
 {
@@ -3250,7 +3273,7 @@ out:
 
 static int
 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
-		   struct tc_cls_flower_offload *f,
+		   struct flow_cls_offload *f,
 		   u16 flow_flags,
 		   struct net_device *filter_dev,
 		   struct mlx5e_tc_flow **__flow)
@@ -3284,12 +3307,12 @@ out:
 
 static int
 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
-		   struct tc_cls_flower_offload *f,
+		   struct flow_cls_offload *f,
 		   u16 flow_flags,
 		   struct net_device *filter_dev,
 		   struct mlx5e_tc_flow **__flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_tc_flow *flow;
@@ -3335,7 +3358,7 @@ out:
 
 static int
 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
-		  struct tc_cls_flower_offload *f,
+		  struct flow_cls_offload *f,
 		  int flags,
 		  struct net_device *filter_dev,
 		  struct mlx5e_tc_flow **flow)
@@ -3349,7 +3372,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
 		return -EOPNOTSUPP;
 
-	if (esw && esw->mode == SRIOV_OFFLOADS)
+	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
 		err = mlx5e_add_fdb_flow(priv, f, flow_flags,
 					 filter_dev, flow);
 	else
@@ -3360,7 +3383,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 }
 
 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
-			   struct tc_cls_flower_offload *f, int flags)
+			   struct flow_cls_offload *f, int flags)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@ -3407,7 +3430,7 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
 }
 
 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
-			struct tc_cls_flower_offload *f, int flags)
+			struct flow_cls_offload *f, int flags)
 {
 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
 	struct mlx5e_tc_flow *flow;
@@ -3426,7 +3449,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 }
 
 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
-		       struct tc_cls_flower_offload *f, int flags)
+		       struct flow_cls_offload *f, int flags)
 {
 	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f62e81902d27..3ab39275ca7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -54,12 +54,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
 
 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
-			   struct tc_cls_flower_offload *f, int flags);
+			   struct flow_cls_offload *f, int flags);
 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
-			struct tc_cls_flower_offload *f, int flags);
+			struct flow_cls_offload *f, int flags);
 
 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
-		       struct tc_cls_flower_offload *f, int flags);
+		       struct flow_cls_offload *f, int flags);
 
 struct mlx5e_encap_entry;
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
 
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+				    struct net_device *out_dev);
+
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 701e5dc75bb0..600e92cb629a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -35,55 +35,12 @@
 #include <net/geneve.h>
 #include <net/dsfield.h>
 #include "en.h"
+#include "en/txrx.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
 #include "lib/clock.h"
 
-#define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-#else
-/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
- * enough room for a resync SKB, a normal SKB and a NOP
- */
-#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-#endif
-
-static inline void mlx5e_tx_dma_unmap(struct device *pdev,
-				      struct mlx5e_sq_dma *dma)
-{
-	switch (dma->type) {
-	case MLX5E_DMA_MAP_SINGLE:
-		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
-		break;
-	case MLX5E_DMA_MAP_PAGE:
-		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
-		break;
-	default:
-		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
-	}
-}
-
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
-{
-	return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
-}
-
-static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
-				  dma_addr_t addr,
-				  u32 size,
-				  enum mlx5e_dma_map_type map_type)
-{
-	struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
-
-	dma->addr = addr;
-	dma->size = size;
-	dma->type = map_type;
-}
-
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
 {
 	int i;
@@ -277,23 +234,6 @@ dma_unmap_wqe_err:
 	return -ENOMEM;
 }
 
-static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
-					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 nnops)
-{
-	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->skb        = NULL;
-		wi->num_wqebbs = 1;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-	sq->stats->nop += nnops;
-}
-
 static inline void
 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		     u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
@@ -301,6 +241,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		     bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
+	bool send_doorbell;
 
 	wi->num_bytes = num_bytes;
 	wi->num_dma = num_dma;
@@ -310,23 +251,21 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-	netdev_tx_sent_queue(sq->txq, num_bytes);
-
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	sq->pc += wi->num_wqebbs;
-	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
 		netif_tx_stop_queue(sq->txq);
 		sq->stats->stopped++;
 	}
 
-	if (!xmit_more || netif_xmit_stopped(sq->txq))
+	send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
+					       xmit_more);
+	if (send_doorbell)
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 }
 
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
-
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
 {
@@ -353,9 +292,12 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 		stats->packets += skb_shinfo(skb)->gso_segs;
 	} else {
+		u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ?
+			MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode;
+
 		opcode    = MLX5_OPCODE_SEND;
 		mss       = 0;
-		ihs       = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+		ihs       = mlx5e_calc_min_inline(mode, skb);
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 		stats->packets++;
 	}
@@ -380,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 #ifdef CONFIG_MLX5_EN_IPSEC
 		struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
 #endif
+#ifdef CONFIG_MLX5_EN_TLS
+		struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
+#endif
 		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+		wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
 #ifdef CONFIG_MLX5_EN_IPSEC
 		wqe->eth = cur_eth;
 #endif
+#ifdef CONFIG_MLX5_EN_TLS
+		wqe->ctrl = cur_ctrl;
+#endif
 	}
 
 	/* fill wqe */
@@ -443,7 +391,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 pi;
 
 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
-	mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
 
 	/* might send skbs and update wqe and pi */
 	skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
@@ -531,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 			wi = &sq->db.wqe_info[ci];
 			skb = wi->skb;
 
-			if (unlikely(!skb)) { /* nop */
-				sqcc++;
+			if (unlikely(!skb)) {
+#ifdef CONFIG_MLX5_EN_TLS
+				if (wi->resync_dump_frag) {
+					struct mlx5e_sq_dma *dma =
+						mlx5e_dma_get(sq, dma_fifo_cc++);
+
+					mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma);
+				}
+#endif
+				sqcc += wi->num_wqebbs;
 				continue;
 			}
 
@@ -574,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
 	if (netif_tx_queue_stopped(sq->txq) &&
-	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
-				   MLX5E_SQ_STOP_ROOM) &&
+	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
 	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
 		netif_tx_wake_queue(sq->txq);
 		stats->wake++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index f9862bf75491..c50b6f0769c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include "en.h"
 #include "en/xdp.h"
+#include "en/xsk/tx.h"
 
 static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 {
@@ -48,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
 {
 	struct mlx5e_sq_stats *stats = sq->stats;
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample;
 
 	if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
 		return;
 
-	net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
-		       &dim_sample);
+	dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
 	net_dim(&sq->dim, dim_sample);
 }
 
 static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
 {
 	struct mlx5e_rq_stats *stats = rq->stats;
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
 		return;
 
-	net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
-		       &dim_sample);
+	dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
 	net_dim(&rq->dim, dim_sample);
 }
 
@@ -87,7 +86,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
 					       napi);
 	struct mlx5e_ch_stats *ch_stats = c->stats;
+	struct mlx5e_xdpsq *xsksq = &c->xsksq;
+	struct mlx5e_rq *xskrq = &c->xskrq;
 	struct mlx5e_rq *rq = &c->rq;
+	bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+	bool aff_change = false;
+	bool busy_xsk = false;
 	bool busy = false;
 	int work_done = 0;
 	int i;
@@ -97,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	for (i = 0; i < c->num_tc; i++)
 		busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
-	busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
+	busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
 
 	if (c->xdp)
-		busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
+		busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
 
 	if (likely(budget)) { /* budget=0 means: don't poll rx rings */
-		work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+		if (xsk_open)
+			work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+		if (likely(budget - work_done))
+			work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
 		busy |= work_done == budget;
 	}
 
-	busy |= c->rq.post_wqes(rq);
+	mlx5e_poll_ico_cq(&c->icosq.cq);
+
+	busy |= rq->post_wqes(rq);
+	if (xsk_open) {
+		mlx5e_poll_ico_cq(&c->xskicosq.cq);
+		busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+		busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+		busy_xsk |= xskrq->post_wqes(xskrq);
+	}
+
+	busy |= busy_xsk;
 
 	if (busy) {
 		if (likely(mlx5e_channel_no_affinity_change(c)))
 			return budget;
 		ch_stats->aff_change++;
+		aff_change = true;
 		if (budget && work_done == budget)
 			work_done--;
 	}
@@ -133,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	mlx5e_cq_arm(&c->icosq.cq);
 	mlx5e_cq_arm(&c->xdpsq.cq);
 
+	if (xsk_open) {
+		mlx5e_handle_rx_dim(xskrq);
+		mlx5e_cq_arm(&c->xskicosq.cq);
+		mlx5e_cq_arm(&xsksq->cq);
+		mlx5e_cq_arm(&xskrq->cq);
+	}
+
+	if (unlikely(aff_change && busy_xsk)) {
+		mlx5e_trigger_irq(&c->icosq);
+		ch_stats->force_irq++;
+	}
+
 	return work_done;
 }
 
-void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
 {
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 23883d1fa22f..41f25ea2e8d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -61,17 +61,21 @@ enum {
 	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
 };
 
-struct mlx5_irq_info {
-	cpumask_var_t mask;
-	char name[MLX5_MAX_IRQ_NAME];
-	void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+	MLX5_EQ_POLLING_BUDGET	= 128,
 };
 
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
 struct mlx5_eq_table {
 	struct list_head        comp_eqs_list;
-	struct mlx5_eq          pages_eq;
-	struct mlx5_eq	        cmd_eq;
-	struct mlx5_eq          async_eq;
+	struct mlx5_eq_async    pages_eq;
+	struct mlx5_eq_async    cmd_eq;
+	struct mlx5_eq_async    async_eq;
 
 	struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
 
@@ -79,11 +83,8 @@ struct mlx5_eq_table {
 	struct mlx5_nb          cq_err_nb;
 
 	struct mutex            lock; /* sync async eqs creations */
-	int			num_comp_vectors;
-	struct mlx5_irq_info	*irq_info;
-#ifdef CONFIG_RFS_ACCEL
-	struct cpu_rmap         *rmap;
-#endif
+	int			num_comp_eqs;
+	struct mlx5_irq_table	*irq_table;
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
@@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 	return cq;
 }
 
-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+			    __always_unused unsigned long action,
+			    __always_unused void *data)
 {
-	struct mlx5_eq_comp *eq_comp = eq_ptr;
-	struct mlx5_eq *eq = eq_ptr;
+	struct mlx5_eq_comp *eq_comp =
+		container_of(nb, struct mlx5_eq_comp, irq_nb);
+	struct mlx5_eq *eq = &eq_comp->core;
 	struct mlx5_eqe *eqe;
-	int set_ci = 0;
+	int num_eqes = 0;
 	u32 cqn = -1;
 
-	while ((eqe = next_eqe_sw(eq))) {
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
 		struct mlx5_core_cq *cq;
+
 		/* Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
 		 */
@@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
 		cq = mlx5_eq_cq_get(eq, cqn);
 		if (likely(cq)) {
 			++cq->arm_sn;
-			cq->comp(cq);
+			cq->comp(cq, eqe);
 			mlx5_cq_put(cq);
 		} else {
 			mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
 		}
 
 		++eq->cons_index;
-		++set_ci;
 
-		/* The HCA will think the queue has overflowed if we
-		 * don't tell it we've been processing events.  We
-		 * create our EQs with MLX5_NUM_SPARE_EQE extra
-		 * entries, so we must update our consumer index at
-		 * least that often.
-		 */
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 
+out:
 	eq_update_ci(eq, 1);
 
 	if (cqn != -1)
 		tasklet_schedule(&eq_comp->tasklet_ctx.task);
 
-	return IRQ_HANDLED;
+	return 0;
 }
 
 /* Some architectures don't latch interrupts when they are disabled, so using
@@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 
 	disable_irq(eq->core.irqn);
 	count_eqe = eq->core.cons_index;
-	mlx5_eq_comp_int(eq->core.irqn, eq);
+	mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
 	count_eqe = eq->core.cons_index - count_eqe;
 	enable_irq(eq->core.irqn);
 
 	return count_eqe;
 }
 
-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+			     unsigned long action, void *data)
 {
-	struct mlx5_eq *eq = eq_ptr;
+	struct mlx5_eq_async *eq_async =
+		container_of(nb, struct mlx5_eq_async, irq_nb);
+	struct mlx5_eq *eq = &eq_async->core;
 	struct mlx5_eq_table *eqt;
 	struct mlx5_core_dev *dev;
 	struct mlx5_eqe *eqe;
-	int set_ci = 0;
+	int num_eqes = 0;
 
 	dev = eq->dev;
 	eqt = dev->priv.eq_table;
 
-	while ((eqe = next_eqe_sw(eq))) {
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
@@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
 		atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
 
 		++eq->cons_index;
-		++set_ci;
 
-		/* The HCA will think the queue has overflowed if we
-		 * don't tell it we've been processing events.  We
-		 * create our EQs with MLX5_NUM_SPARE_EQE extra
-		 * entries, so we must update our consumer index at
-		 * least that often.
-		 */
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 
+out:
 	eq_update_ci(eq, 1);
 
-	return IRQ_HANDLED;
+	return 0;
 }
 
 static void init_eq_buf(struct mlx5_eq *eq)
@@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
 }
 
 static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	      struct mlx5_eq_param *param)
 {
-	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
-	u8 vecidx = param->index;
+	u8 vecidx = param->irq_index;
 	__be64 *pas;
 	void *eqc;
 	int inlen;
 	u32 *in;
 	int err;
-
-	if (eq_table->irq_info[vecidx].context)
-		return -EEXIST;
+	int i;
 
 	/* Init CQ table */
 	memset(cq_table, 0, sizeof(*cq_table));
@@ -291,10 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
 	mlx5_fill_page_array(&eq->buf, pas);
 
 	MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
-	if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx))
+	if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
 		MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
 
-	MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
+	for (i = 0; i < 4; i++)
+		MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+				 param->mask[i]);
 
 	eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
 	MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -307,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
 	if (err)
 		goto err_in;
 
-	snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
-		 name, pci_name(dev->pdev));
-	eq_table->irq_info[vecidx].context = param->context;
-
 	eq->vecidx = vecidx;
 	eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
 	eq->irqn = pci_irq_vector(dev->pdev, vecidx);
 	eq->dev = dev;
 	eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-	err = request_irq(eq->irqn, param->handler, 0,
-			  eq_table->irq_info[vecidx].name, param->context);
-	if (err)
-		goto err_eq;
 
 	err = mlx5_debug_eq_add(dev, eq);
 	if (err)
-		goto err_irq;
-
-	/* EQs are created in ARMED state
-	 */
-	eq_update_ci(eq, 1);
+		goto err_eq;
 
 	kvfree(in);
 	return 0;
 
-err_irq:
-	free_irq(eq->irqn, eq);
-
 err_eq:
 	mlx5_cmd_destroy_eq(dev, eq->eqn);
 
@@ -346,18 +326,48 @@ err_buf:
 	return err;
 }
 
-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		   struct notifier_block *nb)
 {
 	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-	struct mlx5_irq_info *irq_info;
 	int err;
 
-	irq_info = &eq_table->irq_info[eq->vecidx];
+	err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+	if (!err)
+		eq_update_ci(eq, 1);
 
-	mlx5_debug_eq_remove(dev, eq);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		     struct notifier_block *nb)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+	mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	int err;
 
-	free_irq(eq->irqn, irq_info->context);
-	irq_info->context = NULL;
+	mlx5_debug_eq_remove(dev, eq);
 
 	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
 	if (err)
@@ -382,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 	return err;
 }
 
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 {
 	struct mlx5_cq_table *table = &eq->cq_table;
 	struct mlx5_core_cq *tmp;
@@ -392,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 	spin_unlock(&table->lock);
 
 	if (!tmp) {
-		mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
-		return -ENOENT;
-	}
-
-	if (tmp != cq) {
-		mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
-		return -EINVAL;
+		mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
+			      eq->eqn, cq->cqn);
+		return;
 	}
 
-	return 0;
+	if (tmp != cq)
+		mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
+			      eq->eqn, cq->cqn);
 }
 
 int mlx5_eq_table_init(struct mlx5_core_dev *dev)
@@ -423,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 	for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
 		ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
 
+	eq_table->irq_table = dev->priv.irq_table;
 	return 0;
 
 kvfree_eq_table:
@@ -439,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 
 /* Async EQs */
 
-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
 			   struct mlx5_eq *eq, struct mlx5_eq_param *param)
 {
 	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;
 
 	mutex_lock(&eq_table->lock);
-	if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
-		err = -ENOSPC;
+	/* Async EQs must share irq index 0 */
+	if (param->irq_index != 0) {
+		err = -EINVAL;
 		goto unlock;
 	}
 
-	err = create_map_eq(dev, eq, name, param);
+	err = create_map_eq(dev, eq, param);
 unlock:
 	mutex_unlock(&eq_table->lock);
 	return err;
@@ -480,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
 	/* type == MLX5_EVENT_TYPE_CQ_ERROR */
 
 	eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
-	eq  = &eqt->async_eq;
+	eq  = &eqt->async_eq.core;
 	eqe = data;
 
 	cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@@ -493,14 +503,31 @@ static int cq_err_event_notifier(struct notifier_block *nb,
 		return NOTIFY_OK;
 	}
 
-	cq->event(cq, type);
+	if (cq->event)
+		cq->event(cq, type);
 
 	mlx5_cq_put(cq);
 
 	return NOTIFY_OK;
 }
 
-static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
+{
+	__be64 *user_unaffiliated_events;
+	__be64 *user_affiliated_events;
+	int i;
+
+	user_affiliated_events =
+		MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
+	user_unaffiliated_events =
+		MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
+
+	for (i = 0; i < 4; i++)
+		mask[i] |= be64_to_cpu(user_affiliated_events[i] |
+				       user_unaffiliated_events[i]);
+}
+
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
 {
 	u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 
@@ -533,10 +560,14 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
 
-	if (mlx5_core_is_ecpf_esw_manager(dev))
-		async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+	if (mlx5_eswitch_is_funcs_handler(dev))
+		async_event_mask |=
+			(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
 
-	return async_event_mask;
+	mask[0] = async_event_mask;
+
+	if (MLX5_CAP_GEN(dev, event_cap))
+		gather_user_async_events(dev, mask);
 }
 
 static int create_async_eqs(struct mlx5_core_dev *dev)
@@ -548,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
 	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 
+	table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_CMD_IDX,
-		.mask = 1ull << MLX5_EVENT_TYPE_CMD,
+		.irq_index = 0,
 		.nent = MLX5_NUM_CMD_EQE,
-		.context = &table->cmd_eq,
-		.handler = mlx5_eq_async_int,
 	};
-	err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+
+	param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
+	err = create_async_eq(dev, &table->cmd_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
 		goto err0;
 	}
-
+	err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+		goto err1;
+	}
 	mlx5_cmd_use_events(dev);
 
+	table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_ASYNC_IDX,
-		.mask = gather_async_events_mask(dev),
+		.irq_index = 0,
 		.nent = MLX5_NUM_ASYNC_EQE,
-		.context = &table->async_eq,
-		.handler = mlx5_eq_async_int,
 	};
-	err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+
+	gather_async_events_mask(dev, param.mask);
+	err = create_async_eq(dev, &table->async_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
-		goto err1;
+		goto err2;
+	}
+	err = mlx5_eq_enable(dev, &table->async_eq.core,
+			     &table->async_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+		goto err3;
 	}
 
+	table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_PAGEREQ_IDX,
-		.mask =  1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+		.irq_index = 0,
 		.nent = /* TODO: sriov max_vf + */ 1,
-		.context = &table->pages_eq,
-		.handler = mlx5_eq_async_int,
 	};
-	err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+
+	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
+	err = create_async_eq(dev, &table->pages_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
-		goto err2;
+		goto err4;
+	}
+	err = mlx5_eq_enable(dev, &table->pages_eq.core,
+			     &table->pages_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+		goto err5;
 	}
 
 	return err;
 
+err5:
+	destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+	destroy_async_eq(dev, &table->async_eq.core);
 err2:
-	destroy_async_eq(dev, &table->async_eq);
-
-err1:
 	mlx5_cmd_use_polling(dev);
-	destroy_async_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+	destroy_async_eq(dev, &table->cmd_eq.core);
 err0:
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
@@ -607,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 	int err;
 
-	err = destroy_async_eq(dev, &table->pages_eq);
+	mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->pages_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
 			      err);
 
-	err = destroy_async_eq(dev, &table->async_eq);
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->async_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
 			      err);
 
 	mlx5_cmd_use_polling(dev);
 
-	err = destroy_async_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->cmd_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
 			      err);
@@ -629,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 
 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-	return &dev->priv.eq_table->async_eq;
+	return &dev->priv.eq_table->async_eq.core;
 }
 
 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
 {
-	synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+	synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
 }
 
 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
 {
-	synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+	synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
 }
 
 /* Generic EQ API for mlx5_core consumers
  * Needed For RDMA ODP EQ for now
  */
 struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
 		       struct mlx5_eq_param *param)
 {
 	struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@@ -655,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
 	if (!eq)
 		return ERR_PTR(-ENOMEM);
 
-	err = create_async_eq(dev, name, eq, param);
+	err = create_async_eq(dev, eq, param);
 	if (err) {
 		kvfree(eq);
 		eq = ERR_PTR(err);
@@ -713,84 +768,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
 }
 EXPORT_SYMBOL(mlx5_eq_update_ci);
 
-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	struct mlx5_priv *priv  = &mdev->priv;
-	int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-	int irq = pci_irq_vector(mdev->pdev, vecidx);
-	struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-	if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
-		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-		return -ENOMEM;
-	}
-
-	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-			irq_info->mask);
-
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    irq_set_affinity_hint(irq, irq_info->mask))
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-	return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-	struct mlx5_priv *priv  = &mdev->priv;
-	int irq = pci_irq_vector(mdev->pdev, vecidx);
-	struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-	irq_set_affinity_hint(irq, NULL);
-	free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
-		err = set_comp_irq_affinity_hint(mdev, i);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	for (i--; i >= 0; i--)
-		clear_comp_irq_affinity_hint(mdev, i);
-
-	return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
-		clear_comp_irq_affinity_hint(mdev, i);
-}
-
 static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 	struct mlx5_eq_comp *eq, *n;
 
-	clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-	if (table->rmap) {
-		free_irq_cpu_rmap(table->rmap);
-		table->rmap = NULL;
-	}
-#endif
 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 		list_del(&eq->list);
+		mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
 		if (destroy_unmap_eq(dev, &eq->core))
 			mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
 				       eq->core.eqn);
@@ -802,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 static int create_comp_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
-	char name[MLX5_MAX_IRQ_NAME];
 	struct mlx5_eq_comp *eq;
-	int ncomp_vec;
+	int ncomp_eqs;
 	int nent;
 	int err;
 	int i;
 
 	INIT_LIST_HEAD(&table->comp_eqs_list);
-	ncomp_vec = table->num_comp_vectors;
+	ncomp_eqs = table->num_comp_eqs;
 	nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-	table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-	if (!table->rmap)
-		return -ENOMEM;
-#endif
-	for (i = 0; i < ncomp_vec; i++) {
-		int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+	for (i = 0; i < ncomp_eqs; i++) {
+		int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
 		struct mlx5_eq_param param = {};
 
 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@@ -833,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 		tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
 			     (unsigned long)&eq->tasklet_ctx);
 
-#ifdef CONFIG_RFS_ACCEL
-		irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
-		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+		eq->irq_nb.notifier_call = mlx5_eq_comp_int;
 		param = (struct mlx5_eq_param) {
-			.index = vecidx,
-			.mask = 0,
+			.irq_index = vecidx,
 			.nent = nent,
-			.context = &eq->core,
-			.handler = mlx5_eq_comp_int
 		};
-		err = create_map_eq(dev, &eq->core, name, &param);
+		err = create_map_eq(dev, &eq->core, &param);
+		if (err) {
+			kfree(eq);
+			goto clean;
+		}
+		err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
 		if (err) {
+			destroy_unmap_eq(dev, &eq->core);
 			kfree(eq);
 			goto clean;
 		}
+
 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
 		/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
 		list_add_tail(&eq->list, &table->comp_eqs_list);
 	}
 
-	err = set_comp_irq_affinity_hints(dev);
-	if (err) {
-		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
-		goto clean;
-	}
-
 	return 0;
 
 clean:
@@ -890,22 +864,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);
 
 unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
 {
-	return dev->priv.eq_table->num_comp_vectors;
+	return dev->priv.eq_table->num_comp_eqs;
 }
 EXPORT_SYMBOL(mlx5_comp_vectors_count);
 
 struct cpumask *
 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
 {
-	/* TODO: consider irq_get_affinity_mask(irq) */
-	return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+	int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+	return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+					  vecidx);
 }
 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
 
 #ifdef CONFIG_RFS_ACCEL
 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
 {
-	return dev->priv.eq_table->rmap;
+	return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
 }
 #endif
 
@@ -926,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
-	int i, max_eqs;
-
-	clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-	if (table->rmap) {
-		free_irq_cpu_rmap(table->rmap);
-		table->rmap = NULL;
-	}
-#endif
 
 	mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
-	max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
-	for (i = max_eqs - 1; i >= 0; i--) {
-		if (!table->irq_info[i].context)
-			continue;
-		free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
-		table->irq_info[i].context = NULL;
-	}
+	mlx5_irq_table_destroy(dev);
 	mutex_unlock(&table->lock);
-	pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_eq_table *table = priv->eq_table;
-	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-		      MLX5_CAP_GEN(dev, max_num_eqs) :
-		      1 << MLX5_CAP_GEN(dev, log_max_eq);
-	int nvec;
-	int err;
-
-	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-	       MLX5_EQ_VEC_COMP_BASE;
-	nvec = min_t(int, nvec, num_eqs);
-	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-		return -ENOMEM;
-
-	table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
-	if (!table->irq_info)
-		return -ENOMEM;
-
-	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
-				     nvec, PCI_IRQ_MSIX);
-	if (nvec < 0) {
-		err = nvec;
-		goto err_free_irq_info;
-	}
-
-	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-	return 0;
-
-err_free_irq_info:
-	kfree(table->irq_info);
-	return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-
-	pci_free_irq_vectors(dev->pdev);
-	kfree(priv->eq_table->irq_info);
 }
 
 int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 {
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;
 
-	err = alloc_irq_vectors(dev);
-	if (err) {
-		mlx5_core_err(dev, "alloc irq vectors failed\n");
-		return err;
-	}
+	eq_table->num_comp_eqs =
+		mlx5_irq_get_num_comp(eq_table->irq_table);
 
 	err = create_async_eqs(dev);
 	if (err) {
@@ -1019,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 err_comp_eqs:
 	destroy_async_eqs(dev);
 err_async_eqs:
-	free_irq_vectors(dev);
 	return err;
 }
 
@@ -1027,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
 {
 	destroy_comp_eqs(dev);
 	destroy_async_eqs(dev);
-	free_irq_vectors(dev);
 }
 
 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
@@ -1039,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
 
 	return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
 }
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
 
 int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
 {
@@ -1049,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
 
 	return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
 }
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6a921e24cd5e..7281f8d6cba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					  void *in, int inlen)
+{
+	return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+				       void *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+	MLX5_SET(query_esw_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+	MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					 void *out, int outlen)
+{
+	return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 				  u16 vlan, u8 qos, u8 set_flags)
 {
@@ -473,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 
 fdb_add:
 	/* SRIOV is enabled: Forward UC MAC to vport */
-	if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+	if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
 		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
 	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
@@ -873,7 +897,7 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	u8 mac[ETH_ALEN];
 
-	mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+	mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
 	esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
 		  vport->vport, mac);
 
@@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
 
 	vport->ingress.drop_rule = NULL;
 	vport->ingress.allow_rule = NULL;
+
+	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
 }
 
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
@@ -1527,6 +1553,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 				 struct mlx5_vport *vport)
 {
 	u16 vport_num = vport->vport;
+	int flags;
 
 	if (esw->manager_vport == vport_num)
 		return;
@@ -1544,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 						vport->info.node_guid);
 	}
 
+	flags = (vport->info.vlan || vport->info.qos) ?
+		SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
 	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
-			       (vport->info.vlan || vport->info.qos));
+			       flags);
 
 	/* Only legacy mode needs ACLs */
-	if (esw->mode == SRIOV_LEGACY) {
+	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		esw_vport_ingress_config(esw, vport);
 		esw_vport_egress_config(esw, vport);
 	}
@@ -1600,7 +1629,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
 	/* Create steering drop counters for ingress and egress ACLs */
-	if (vport_num && esw->mode == SRIOV_LEGACY)
+	if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY)
 		esw_vport_create_drop_counters(vport);
 
 	/* Restore old vport configuration */
@@ -1654,7 +1683,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw,
 	vport->enabled_events = 0;
 	esw_vport_disable_qos(esw, vport);
 	if (esw->manager_vport != vport_num &&
-	    esw->mode == SRIOV_LEGACY) {
+	    esw->mode == MLX5_ESWITCH_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
 					      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 					      vport_num, 1,
@@ -1686,54 +1715,91 @@ static int eswitch_vport_event(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/**
+ * mlx5_esw_query_functions - Returns raw output about functions state
+ * @dev:	Pointer to device to query
+ *
+ * mlx5_esw_query_functions() allocates and returns functions changed
+ * raw output memory pointer from device on success. Otherwise returns ERR_PTR.
+ * Caller must free the memory using kvfree() when valid pointer is returned.
+ */
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
+	u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+	u32 *out;
+	int err;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(query_esw_functions_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	if (!err)
+		return out;
+
+	kvfree(out);
+	return ERR_PTR(err);
+}
+
+static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
+{
+	MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+	mlx5_eq_notifier_register(esw->dev, &esw->nb);
+
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+		MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+			     ESW_FUNCTIONS_CHANGED);
+		mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+	}
+}
+
+static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
+{
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+		mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+	mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
+	flush_workqueue(esw->work_queue);
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
 {
-	int vf_nvports = 0, total_nvports = 0;
 	struct mlx5_vport *vport;
 	int err;
 	int i, enabled_events;
 
 	if (!ESW_ALLOWED(esw) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
-		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+		esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
 	}
 
 	if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+		esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
 
 	if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
-
-	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
-
-	if (mode == SRIOV_OFFLOADS) {
-		if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-			err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports);
-			if (err)
-				return err;
-			total_nvports = esw->total_vports;
-		} else {
-			vf_nvports = nvfs;
-			total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
-		}
-	}
+		esw_warn(esw->dev, "engress ACL is not supported by FW\n");
 
 	esw->mode = mode;
 
 	mlx5_lag_update(esw->dev);
 
-	if (mode == SRIOV_LEGACY) {
+	if (mode == MLX5_ESWITCH_LEGACY) {
 		err = esw_create_legacy_table(esw);
 		if (err)
 			goto abort;
 	} else {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-		err = esw_offloads_init(esw, vf_nvports, total_nvports);
+		err = esw_offloads_init(esw);
 	}
 
 	if (err)
@@ -1743,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (err)
 		esw_warn(esw->dev, "Failed to create eswitch TSAR");
 
-	/* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
-	 * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
-	 * 2. FDB/Eswitch is programmed by user space tools
-	 */
-	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+	enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS :
+		UC_ADDR_CHANGE;
 
 	/* Enable PF vport */
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
@@ -1760,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	}
 
 	/* Enable VF vports */
-	mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
 		esw_enable_vport(esw, vport, enabled_events);
 
-	if (mode == SRIOV_LEGACY) {
-		MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
-		mlx5_eq_notifier_register(esw->dev, &esw->nb);
-	}
+	mlx5_eswitch_event_handlers_register(esw);
+
+	esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+		 mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+		 esw->esw_funcs.num_vfs, esw->enabled_vports);
 
-	esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
-		 esw->enabled_vports);
 	return 0;
 
 abort:
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 
-	if (mode == SRIOV_OFFLOADS) {
+	if (mode == MLX5_ESWITCH_OFFLOADS) {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 	}
@@ -1783,23 +1845,22 @@ abort:
 	return err;
 }
 
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
 	struct mlx5_vport *vport;
 	int old_mode;
 	int i;
 
-	if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+	if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
 		return;
 
-	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
-		 esw->enabled_vports, esw->mode);
+	esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+		 esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+		 esw->esw_funcs.num_vfs, esw->enabled_vports);
 
 	mc_promisc = &esw->mc_promisc;
-
-	if (esw->mode == SRIOV_LEGACY)
-		mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+	mlx5_eswitch_event_handlers_unregister(esw);
 
 	mlx5_esw_for_all_vports(esw, i, vport)
 		esw_disable_vport(esw, vport);
@@ -1809,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 
 	esw_destroy_tsar(esw);
 
-	if (esw->mode == SRIOV_LEGACY)
+	if (esw->mode == MLX5_ESWITCH_LEGACY)
 		esw_destroy_legacy_table(esw);
-	else if (esw->mode == SRIOV_OFFLOADS)
+	else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
 		esw_offloads_cleanup(esw);
 
 	old_mode = esw->mode;
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 
 	mlx5_lag_update(esw->dev);
 
-	if (old_mode == SRIOV_OFFLOADS) {
+	if (old_mode == MLX5_ESWITCH_OFFLOADS) {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 	}
@@ -1827,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
-	int total_vports = MLX5_TOTAL_VPORTS(dev);
 	struct mlx5_eswitch *esw;
 	struct mlx5_vport *vport;
+	int total_vports;
 	int err, i;
 
 	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
 
+	total_vports = mlx5_eswitch_get_total_vports(dev);
+
 	esw_info(dev,
 		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
 		 total_vports,
@@ -1847,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 	esw->dev = dev;
 	esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+	esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
 
 	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
 	if (!esw->work_queue) {
@@ -1880,7 +1944,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	}
 
 	esw->enabled_vports = 0;
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
 	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
@@ -1950,7 +2014,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 
 	ether_addr_copy(evport->info.mac, mac);
 	evport->info.node_guid = node_guid;
-	if (evport->enabled && esw->mode == SRIOV_LEGACY)
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
 
 unlock:
@@ -2034,7 +2098,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 
 	evport->info.vlan = vlan;
 	evport->info.qos = qos;
-	if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
 		err = esw_vport_ingress_config(esw, evport);
 		if (err)
 			goto unlock;
@@ -2076,7 +2140,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 		mlx5_core_warn(esw->dev,
 			       "Spoofchk in set while MAC is invalid, vport(%d)\n",
 			       evport->vport);
-	if (evport->enabled && esw->mode == SRIOV_LEGACY)
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
 	if (err)
 		evport->info.spoofchk = pschk;
@@ -2172,7 +2236,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
 		return -EPERM;
 
 	mutex_lock(&esw->state_lock);
-	if (esw->mode != SRIOV_LEGACY) {
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -2195,7 +2259,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
 		return -EPERM;
 
 	mutex_lock(&esw->state_lock);
-	if (esw->mode != SRIOV_LEGACY) {
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -2338,7 +2402,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
 	u64 bytes = 0;
 	int err = 0;
 
-	if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+	if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
 		return 0;
 
 	if (vport->egress.drop_counter)
@@ -2448,16 +2512,27 @@ free_out:
 
 u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
 {
-	return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+	return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
 
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw;
+
+	esw = dev->priv.eswitch;
+	return ESW_ALLOWED(esw) ? esw->offloads.encap :
+		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
 bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
 {
-	if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
-	     dev1->priv.eswitch->mode == SRIOV_NONE) ||
-	    (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
-	     dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+	if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+	     dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
+	    (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+	     dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
 		return true;
 
 	return false;
@@ -2466,6 +2541,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
 			       struct mlx5_core_dev *dev1)
 {
-	return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
-		dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
+	return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+		dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+{
+	const u32 *out;
+
+	WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+
+	if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		esw->esw_funcs.num_vfs = num_vfs;
+		return;
+	}
+
+	out = mlx5_esw_query_functions(esw->dev);
+	if (IS_ERR(out))
+		return;
+
+	esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+					  host_params_context.host_num_of_vfs);
+	kvfree(out);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d043d6f9797d..a38e8a3c7c9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -68,6 +68,8 @@ struct vport_ingress {
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
+	int modify_metadata_id;
+	struct mlx5_flow_handle  *modify_metadata_rule;
 	struct mlx5_flow_handle  *allow_rule;
 	struct mlx5_flow_handle  *drop_rule;
 	struct mlx5_fc           *drop_counter;
@@ -173,9 +175,12 @@ struct mlx5_esw_offload {
 	struct mutex peer_mutex;
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	DECLARE_HASHTABLE(termtbl_tbl, 8);
+	struct mutex termtbl_mutex; /* protects termtbl hash */
+	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
 	u8 inline_mode;
 	u64 num_flows;
-	u8 encap;
+	enum devlink_eswitch_encap_mode encap;
 };
 
 /* E-Switch MC FDB table hash node */
@@ -190,11 +195,15 @@ struct mlx5_host_work {
 	struct mlx5_eswitch	*esw;
 };
 
-struct mlx5_host_info {
+struct mlx5_esw_functions {
 	struct mlx5_nb		nb;
 	u16			num_vfs;
 };
 
+enum {
+	MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
 	struct mlx5_nb          nb;
@@ -202,6 +211,7 @@ struct mlx5_eswitch {
 	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
 	struct workqueue_struct *work_queue;
 	struct mlx5_vport       *vports;
+	u32 flags;
 	int                     total_vports;
 	int                     enabled_vports;
 	/* Synchronize between vport change events
@@ -219,12 +229,12 @@ struct mlx5_eswitch {
 	int                     mode;
 	int                     nvports;
 	u16                     manager_vport;
-	struct mlx5_host_info	host_info;
+	u16                     first_host_vport;
+	struct mlx5_esw_functions esw_funcs;
 };
 
 void esw_offloads_cleanup(struct mlx5_eswitch *esw);
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
-		      int total_nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@ -239,12 +249,14 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
 				  struct mlx5_vport *vport);
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport);
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       u16 vport, u8 mac[ETH_ALEN]);
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -266,8 +278,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 struct ifla_vf_stats *vf_stats);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					  void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					 void *out, int outlen);
+
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
+struct mlx5_termtbl_handle;
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_spec *spec);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_table *ft,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_destination *dest,
+			      int num_dest);
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+			 struct mlx5_termtbl_handle *tt);
 
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
@@ -338,6 +374,7 @@ struct mlx5_esw_flow_attr {
 		struct mlx5_eswitch_rep *rep;
 		struct mlx5_core_dev *mdev;
 		u32 encap_id;
+		struct mlx5_termtbl_handle *termtbl;
 	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
 	u32	mod_hdr_id;
 	u8	match_level;
@@ -355,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 					 struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
 					struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -386,6 +425,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
 			       struct mlx5_core_dev *dev1);
 
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(__dev, format, ...)			\
@@ -404,6 +445,24 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
 		MLX5_VPORT_ECPF : MLX5_VPORT_PF;
 }
 
+static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_ecpf_esw_manager(dev) ?
+		MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+}
+
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
+{
+	/* Ideally device should have the functions changed supported
+	 * capability regardless of it being ECPF or PF wherever such
+	 * event should be processed such as on eswitch manager device.
+	 * However, some ECPF based device might not have this capability
+	 * set. Hence OR for ECPF check to cover such device.
+	 */
+	return MLX5_CAP_ESW(dev, esw_functions_changed) ||
+	       mlx5_core_is_ecpf_esw_manager(dev);
+}
+
 static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
 {
 	/* Uplink always locate at the last element of the array.*/
@@ -488,16 +547,47 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
 #define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs)	\
 	for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
 
+/* Includes host PF (vport 0) if it's not esw manager. */
+#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs)	\
+	for ((i) = (esw)->first_host_vport;			\
+	     (rep) = &(esw)->offloads.vport_reps[i],		\
+	     (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs)	\
+	for ((i) = (nvfs);						\
+	     (rep) = &(esw)->offloads.vport_reps[i],			\
+	     (i) >= (esw)->first_host_vport; (i)--)
+
+#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs)	\
+	for ((vport) = (esw)->first_host_vport;			\
+	     (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs)	\
+	for ((vport) = (nvfs);						\
+	     (vport) >= (esw)->first_host_vport; (vport)--)
+
 struct mlx5_vport *__must_check
 mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
 
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
-static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline int  mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
 
 #define FDB_MAX_CHAIN 1
 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 47b446d30f71..8ed4497929b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -41,7 +41,6 @@
 #include "en.h"
 #include "fs_core.h"
 #include "lib/devcom.h"
-#include "ecpf.h"
 #include "lib/eq.h"
 
 /* There are two match-all miss flows, one for unicast dst mac and
@@ -89,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
 	return 1;
 }
 
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+				  struct mlx5_flow_spec *spec,
+				  struct mlx5_esw_flow_attr *attr)
+{
+	void *misc2;
+	void *misc;
+
+	/* Use metadata matching because vport is not represented by single
+	 * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+	 */
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+								   attr->in_rep->vport));
+
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+			MLX5_SET(fte_match_set_misc, misc,
+				 source_eswitch_owner_vhca_id,
+				 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+			MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+					 source_eswitch_owner_vhca_id);
+
+		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+	}
+
+	if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+	    attr->in_rep->vport == MLX5_VPORT_UPLINK)
+		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
@@ -100,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_table *fdb;
 	int j, i = 0;
-	void *misc;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	flow_act.action = attr->action;
@@ -160,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		i++;
 	}
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET(fte_match_set_misc, misc,
-			 source_eswitch_owner_vhca_id,
-			 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-				 source_eswitch_owner_vhca_id);
-
-	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
 		if (attr->tunnel_match_level != MLX5_MATCH_NONE)
 			spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -193,7 +225,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		goto err_esw_get;
 	}
 
-	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
+	if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec))
+		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+						     &flow_act, dest, i);
+	else
+		rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		goto err_add_rule;
 	else
@@ -220,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	struct mlx5_flow_table *fast_fdb;
 	struct mlx5_flow_table *fwd_fdb;
 	struct mlx5_flow_handle *rule;
-	void *misc;
 	int i;
 
 	fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
@@ -252,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	dest[i].ft = fwd_fdb,
 	i++;
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET(fte_match_set_misc, misc,
-			 source_eswitch_owner_vhca_id,
-			 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-				 source_eswitch_owner_vhca_id);
-
-	if (attr->match_level == MLX5_MATCH_NONE)
-		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-	else
-		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
-					      MLX5_MATCH_MISC_PARAMETERS;
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+	if (attr->match_level != MLX5_MATCH_NONE)
+		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
 	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 
@@ -295,8 +316,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 			bool fwd_rule)
 {
 	bool split = (attr->split_count > 0);
+	int i;
 
 	mlx5_del_flow_rules(rule);
+
+	/* unref the term table */
+	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+		if (attr->dests[i].termtbl)
+			mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+	}
+
 	esw->offloads.num_flows--;
 
 	if (fwd_rule)  {
@@ -328,12 +357,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 {
 	struct mlx5_eswitch_rep *rep;
-	int vf_vport, err = 0;
+	int i, err = 0;
 
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
-	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
-		rep = &esw->offloads.vport_reps[vf_vport];
-		if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED)
+	mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+		if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
 			continue;
 
 		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -559,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 	mlx5_del_flow_rules(rule);
 }
 
-static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+	u8 fdb_to_vport_reg_c_id;
+	int err;
+
+	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+						   out, sizeof(out));
+	if (err)
+		return err;
+
+	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+					 esw_vport_context.fdb_to_vport_reg_c_id);
+
+	fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 field_select.fdb_to_vport_reg_c_id, 1);
+
+	return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+						     in, sizeof(in));
+}
+
+static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+	u8 fdb_to_vport_reg_c_id;
+	int err;
+
+	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+						   out, sizeof(out));
+	if (err)
+		return err;
+
+	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+					 esw_vport_context.fdb_to_vport_reg_c_id);
+
+	fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 field_select.fdb_to_vport_reg_c_id, 1);
+
+	return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+						     in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+				  struct mlx5_core_dev *peer_dev,
 				  struct mlx5_flow_spec *spec,
 				  struct mlx5_flow_destination *dest)
 {
-	void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				  misc_parameters);
+	void *misc;
 
-	MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-		 MLX5_CAP_GEN(peer_dev, vhca_id));
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 
-	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-			    misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-			 source_eswitch_owner_vhca_id);
+		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+			 MLX5_CAP_GEN(peer_dev, vhca_id));
+
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+				 source_eswitch_owner_vhca_id);
+	}
 
 	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest->vport.num = peer_dev->priv.eswitch->manager_vport;
@@ -583,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
 	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
 
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+					       struct mlx5_eswitch *peer_esw,
+					       struct mlx5_flow_spec *spec,
+					       u16 vport)
+{
+	void *misc;
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+								   vport));
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+	}
+}
+
 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 				       struct mlx5_core_dev *peer_dev)
 {
@@ -600,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	if (!spec)
 		return -ENOMEM;
 
-	peer_miss_rules_setup(peer_dev, spec, &dest);
+	peer_miss_rules_setup(esw, peer_dev, spec, &dest);
 
 	flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
 	if (!flows) {
@@ -613,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 			    misc_parameters);
 
 	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+		esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+						   spec, MLX5_VPORT_PF);
+
 		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 					   spec, &flow_act, &dest, 1);
 		if (IS_ERR(flow)) {
@@ -635,7 +749,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	}
 
 	mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
-		MLX5_SET(fte_match_set_misc, misc, source_port, i);
+		esw_set_peer_miss_rule_source_port(esw,
+						   peer_dev->priv.eswitch,
+						   spec, i);
+
 		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 					   spec, &flow_act, &dest, 1);
 		if (IS_ERR(flow)) {
@@ -919,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+					   u32 *flow_group_in)
+{
+	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					    flow_group_in,
+					    match_criteria);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 match_criteria_enable,
+			 MLX5_MATCH_MISC_PARAMETERS_2);
+
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters_2.metadata_reg_c_0);
+	} else {
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 match_criteria_enable,
+			 MLX5_MATCH_MISC_PARAMETERS);
+
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters.source_port);
+	}
+}
+
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1016,19 +1157,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
 	/* create peer esw miss group */
 	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_MISC_PARAMETERS);
 
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
-				      match_criteria);
+	esw_set_flow_group_source_port(esw, flow_group_in);
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					      flow_group_in,
+					      match_criteria);
 
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-			 misc_parameters.source_port);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-			 misc_parameters.source_eswitch_owner_vhca_id);
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters.source_eswitch_owner_vhca_id);
+
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 source_eswitch_owner_vhca_id_valid, 1);
+	}
 
-	MLX5_SET(create_flow_group_in, flow_group_in,
-		 source_eswitch_owner_vhca_id_valid, 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
 		 ix + esw->total_vports - 1);
@@ -1142,7 +1285,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_group *g;
 	u32 *flow_group_in;
-	void *match_criteria, *misc;
 	int err = 0;
 
 	nvports = nvports + MLX5_ESW_MISS_FLOWS;
@@ -1152,12 +1294,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 
 	/* create vport rx group */
 	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_MISC_PARAMETERS);
 
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-	misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	esw_set_flow_group_source_port(esw, flow_group_in);
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1196,13 +1334,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
 		goto out;
 	}
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 
-	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	}
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -1220,21 +1369,22 @@ out:
 static int esw_offloads_start(struct mlx5_eswitch *esw,
 			      struct netlink_ext_ack *extack)
 {
-	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+	int err, err1;
 
-	if (esw->mode != SRIOV_LEGACY &&
+	if (esw->mode != MLX5_ESWITCH_LEGACY &&
 	    !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Can't set offloads mode, SRIOV legacy not enabled");
 		return -EINVAL;
 	}
 
-	mlx5_eswitch_disable_sriov(esw);
-	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+	mlx5_eswitch_disable(esw);
+	mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
+	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Failed setting eswitch to offloads");
-		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
 		if (err1) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Failed setting eswitch back to legacy");
@@ -1242,7 +1392,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 	}
 	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
 		if (mlx5_eswitch_inline_mode_get(esw,
-						 num_vfs,
 						 &esw->offloads.inline_mode)) {
 			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
 			NL_SET_ERR_MSG_MOD(extack,
@@ -1259,11 +1408,11 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
 
 int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 {
-	int total_vports = MLX5_TOTAL_VPORTS(esw->dev);
+	int total_vports = esw->total_vports;
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_eswitch_rep *rep;
 	u8 hw_id[ETH_ALEN], rep_type;
-	int vport;
+	int vport_index;
 
 	esw->offloads.vport_reps = kcalloc(total_vports,
 					   sizeof(struct mlx5_eswitch_rep),
@@ -1271,14 +1420,15 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	if (!esw->offloads.vport_reps)
 		return -ENOMEM;
 
-	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+	mlx5_query_mac_address(dev, hw_id);
 
-	mlx5_esw_for_all_reps(esw, vport, rep) {
-		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
+	mlx5_esw_for_all_reps(esw, vport_index, rep) {
+		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
+		rep->vport_index = vport_index;
 		ether_addr_copy(rep->hw_id, hw_id);
 
 		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
-			atomic_set(&rep->rep_if[rep_type].state,
+			atomic_set(&rep->rep_data[rep_type].state,
 				   REP_UNREGISTERED);
 	}
 
@@ -1288,9 +1438,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
 				      struct mlx5_eswitch_rep *rep, u8 rep_type)
 {
-	if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
 			   REP_LOADED, REP_REGISTERED) == REP_LOADED)
-		rep->rep_if[rep_type].unload(rep);
+		esw->offloads.rep_ops[rep_type]->unload(rep);
 }
 
 static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
@@ -1329,21 +1479,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
 		__unload_reps_vf_vport(esw, nvports, rep_type);
 }
 
-static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
-				    u8 rep_type)
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
-	__unload_reps_vf_vport(esw, nvports, rep_type);
+	__unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
 
 	/* Special vports must be the last to unload. */
 	__unload_reps_special_vport(esw, rep_type);
 }
 
-static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
 {
 	u8 rep_type = NUM_REP_TYPES;
 
 	while (rep_type-- > 0)
-		__unload_reps_all_vport(esw, nvports, rep_type);
+		__unload_reps_all_vport(esw, rep_type);
 }
 
 static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
@@ -1351,11 +1500,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
 {
 	int err = 0;
 
-	if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
 			   REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
-		err = rep->rep_if[rep_type].load(esw->dev, rep);
+		err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
 		if (err)
-			atomic_set(&rep->rep_if[rep_type].state,
+			atomic_set(&rep->rep_data[rep_type].state,
 				   REP_REGISTERED);
 	}
 
@@ -1419,6 +1568,26 @@ err_vf:
 	return err;
 }
 
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	int err;
+
+	/* Special vports must be loaded first, uplink rep creates mdev resource. */
+	err = __load_reps_special_vport(esw, rep_type);
+	if (err)
+		return err;
+
+	err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+	if (err)
+		goto err_vfs;
+
+	return 0;
+
+err_vfs:
+	__unload_reps_special_vport(esw, rep_type);
+	return err;
+}
+
 static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
 {
 	u8 rep_type = 0;
@@ -1438,34 +1607,13 @@ err_reps:
 	return err;
 }
 
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
-				 u8 rep_type)
-{
-	int err;
-
-	/* Special vports must be loaded first. */
-	err = __load_reps_special_vport(esw, rep_type);
-	if (err)
-		return err;
-
-	err = __load_reps_vf_vport(esw, nvports, rep_type);
-	if (err)
-		goto err_vfs;
-
-	return 0;
-
-err_vfs:
-	__unload_reps_special_vport(esw, rep_type);
-	return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
 {
 	u8 rep_type = 0;
 	int err;
 
 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
-		err = __load_reps_all_vport(esw, nvports, rep_type);
+		err = __load_reps_all_vport(esw, rep_type);
 		if (err)
 			goto err_reps;
 	}
@@ -1474,7 +1622,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
 
 err_reps:
 	while (rep_type-- > 0)
-		__unload_reps_all_vport(esw, nvports, rep_type);
+		__unload_reps_all_vport(esw, rep_type);
 	return err;
 }
 
@@ -1510,6 +1658,10 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
 	switch (event) {
 	case ESW_OFFLOADS_DEVCOM_PAIR:
+		if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+		    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+			break;
+
 		err = mlx5_esw_offloads_pair(esw, peer_esw);
 		if (err)
 			goto err_out;
@@ -1578,32 +1730,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
 static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 					     struct mlx5_vport *vport)
 {
-	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
 	/* For prio tag mode, there is only 1 FTEs:
-	 * 1) Untagged packets - push prio tag VLAN, allow
+	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
+	 * required, allow
 	 * Unmatched traffic is allowed by default
 	 */
 
-	if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
-		return -EOPNOTSUPP;
-
-	esw_vport_cleanup_ingress_rules(esw, vport);
-
-	err = esw_vport_enable_ingress_acl(esw, vport);
-	if (err) {
-		mlx5_core_warn(esw->dev,
-			       "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
-			       err, vport->vport);
-		return err;
-	}
-
-	esw_debug(esw->dev,
-		  "vport[%d] configure ingress rules\n", vport->vport);
-
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
 		err = -ENOMEM;
@@ -1619,6 +1755,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 	flow_act.vlan[0].ethtype = ETH_P_8021Q;
 	flow_act.vlan[0].vid = 0;
 	flow_act.vlan[0].prio = 0;
+
+	if (vport->ingress.modify_metadata_rule) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+		flow_act.modify_id = vport->ingress.modify_metadata_id;
+	}
+
 	vport->ingress.allow_rule =
 		mlx5_add_flow_rules(vport->ingress.acl, spec,
 				    &flow_act, NULL, 0);
@@ -1639,6 +1781,58 @@ out_no_mem:
 	return err;
 }
 
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+						     struct mlx5_vport *vport)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec spec = {};
+	int err = 0;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+	MLX5_SET(set_action_in, action, data,
+		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+	err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+				       1, action, &vport->ingress.modify_metadata_id);
+	if (err) {
+		esw_warn(esw->dev,
+			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
+			 vport->vport, err);
+		return err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.modify_id = vport->ingress.modify_metadata_id;
+	vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+								  &spec, &flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+		err = PTR_ERR(vport->ingress.modify_metadata_rule);
+		esw_warn(esw->dev,
+			 "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.modify_metadata_rule = NULL;
+		goto out;
+	}
+
+out:
+	if (err)
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+	return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport)
+{
+	if (vport->ingress.modify_metadata_rule) {
+		mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+		vport->ingress.modify_metadata_rule = NULL;
+	}
+}
+
 static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
 					    struct mlx5_vport *vport)
 {
@@ -1646,6 +1840,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
 	/* For prio tag mode, there is only 1 FTEs:
 	 * 1) prio tag packets - pop the prio tag VLAN, allow
 	 * Unmatched traffic is allowed by default
@@ -1699,27 +1896,98 @@ out_no_mem:
 	return err;
 }
 
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+					   struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = NULL;
-	int i, j;
 	int err;
 
-	mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	esw_vport_cleanup_ingress_rules(esw, vport);
+
+	err = esw_vport_enable_ingress_acl(esw, vport);
+	if (err) {
+		esw_warn(esw->dev,
+			 "failed to enable ingress acl (%d) on vport[%d]\n",
+			 err, vport->vport);
+		return err;
+	}
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure ingress rules\n", vport->vport);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+		if (err)
+			goto out;
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+	    mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
 		err = esw_vport_ingress_prio_tag_config(esw, vport);
 		if (err)
-			goto err_ingress;
-		err = esw_vport_egress_prio_tag_config(esw, vport);
+			goto out;
+	}
+
+out:
+	if (err)
+		esw_vport_disable_ingress_acl(esw, vport);
+	return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+	if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+		return false;
+
+	if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+	      MLX5_FDB_TO_VPORT_REG_C_0))
+		return false;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+		return false;
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+	    mlx5_ecpf_vport_exists(esw->dev))
+		return false;
+
+	return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i, j;
+	int err;
+
+	if (esw_check_vport_match_metadata_supported(esw))
+		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		err = esw_vport_ingress_common_config(esw, vport);
 		if (err)
-			goto err_egress;
+			goto err_ingress;
+
+		if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+			err = esw_vport_egress_prio_tag_config(esw, vport);
+			if (err)
+				goto err_egress;
+		}
 	}
 
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
 	return 0;
 
 err_egress:
 	esw_vport_disable_ingress_acl(esw, vport);
 err_ingress:
-	mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+	for (j = MLX5_VPORT_PF; j < i; j++) {
+		vport = &esw->vports[j];
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 	}
@@ -1727,40 +1995,46 @@ err_ingress:
 	return err;
 }
 
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
 	int i;
 
-	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+	mlx5_esw_for_all_vports(esw, i, vport) {
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 	}
+
+	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 }
 
-static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
-				      int nvports)
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 {
+	int num_vfs = esw->esw_funcs.num_vfs;
+	int total_vports;
 	int err;
 
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		total_vports = esw->total_vports;
+	else
+		total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
+
 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
 	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
-	if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
-		err = esw_prio_tag_acls_config(esw, vf_nvports);
-		if (err)
-			return err;
-	}
-
-	err = esw_create_offloads_fdb_tables(esw, nvports);
+	err = esw_create_offloads_acl_tables(esw);
 	if (err)
 		return err;
 
-	err = esw_create_offloads_table(esw, nvports);
+	err = esw_create_offloads_fdb_tables(esw, total_vports);
+	if (err)
+		goto create_fdb_err;
+
+	err = esw_create_offloads_table(esw, total_vports);
 	if (err)
 		goto create_ft_err;
 
-	err = esw_create_vport_rx_group(esw, nvports);
+	err = esw_create_vport_rx_group(esw, total_vports);
 	if (err)
 		goto create_fg_err;
 
@@ -1772,6 +2046,9 @@ create_fg_err:
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
+create_fdb_err:
+	esw_destroy_offloads_acl_tables(esw);
+
 	return err;
 }
 
@@ -1780,88 +2057,105 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 	esw_destroy_vport_rx_group(esw);
 	esw_destroy_offloads_table(esw);
 	esw_destroy_offloads_fdb_tables(esw);
-	if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
-		esw_prio_tag_acls_cleanup(esw);
+	esw_destroy_offloads_acl_tables(esw);
 }
 
-static void esw_host_params_event_handler(struct work_struct *work)
+static void
+esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
 {
-	struct mlx5_host_work *host_work;
-	struct mlx5_eswitch *esw;
-	int err, num_vf = 0;
+	bool host_pf_disabled;
+	u16 new_num_vfs;
 
-	host_work = container_of(work, struct mlx5_host_work, work);
-	esw = host_work->esw;
+	new_num_vfs = MLX5_GET(query_esw_functions_out, out,
+			       host_params_context.host_num_of_vfs);
+	host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+				    host_params_context.host_pf_disabled);
 
-	err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
-	if (err || num_vf == esw->host_info.num_vfs)
-		goto out;
+	if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
+		return;
 
 	/* Number of VFs can only change from "0 to x" or "x to 0". */
-	if (esw->host_info.num_vfs > 0) {
-		esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
+	if (esw->esw_funcs.num_vfs > 0) {
+		esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
 	} else {
-		err = esw_offloads_load_vf_reps(esw, num_vf);
+		int err;
 
+		err = esw_offloads_load_vf_reps(esw, new_num_vfs);
 		if (err)
-			goto out;
+			return;
 	}
+	esw->esw_funcs.num_vfs = new_num_vfs;
+}
+
+static void esw_functions_changed_event_handler(struct work_struct *work)
+{
+	struct mlx5_host_work *host_work;
+	struct mlx5_eswitch *esw;
+	const u32 *out;
 
-	esw->host_info.num_vfs = num_vf;
+	host_work = container_of(work, struct mlx5_host_work, work);
+	esw = host_work->esw;
 
+	out = mlx5_esw_query_functions(esw->dev);
+	if (IS_ERR(out))
+		goto out;
+
+	esw_vfs_changed_event_handler(esw, out);
+	kvfree(out);
 out:
 	kfree(host_work);
 }
 
-static int esw_host_params_event(struct notifier_block *nb,
-				 unsigned long type, void *data)
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
 {
+	struct mlx5_esw_functions *esw_funcs;
 	struct mlx5_host_work *host_work;
-	struct mlx5_host_info *host_info;
 	struct mlx5_eswitch *esw;
 
 	host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
 	if (!host_work)
 		return NOTIFY_DONE;
 
-	host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
-	esw = container_of(host_info, struct mlx5_eswitch, host_info);
+	esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
+	esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
 
 	host_work->esw = esw;
 
-	INIT_WORK(&host_work->work, esw_host_params_event_handler);
+	INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
 	queue_work(esw->work_queue, &host_work->work);
 
 	return NOTIFY_OK;
 }
 
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
-		      int total_nvports)
+int esw_offloads_init(struct mlx5_eswitch *esw)
 {
 	int err;
 
-	err = esw_offloads_steering_init(esw, vf_nvports, total_nvports);
+	err = esw_offloads_steering_init(esw);
 	if (err)
 		return err;
 
-	err = esw_offloads_load_all_reps(esw, vf_nvports);
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+		if (err)
+			goto err_vport_metadata;
+	}
+
+	err = esw_offloads_load_all_reps(esw);
 	if (err)
 		goto err_reps;
 
 	esw_offloads_devcom_init(esw);
-
-	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-		MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
-			     HOST_PARAMS_CHANGE);
-		mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
-		esw->host_info.num_vfs = vf_nvports;
-	}
+	mutex_init(&esw->offloads.termtbl_mutex);
 
 	mlx5_rdma_enable_roce(esw->dev);
 
 	return 0;
 
 err_reps:
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		mlx5_eswitch_disable_passing_vport_metadata(esw);
+err_vport_metadata:
 	esw_offloads_steering_cleanup(esw);
 	return err;
 }
@@ -1869,13 +2163,13 @@ err_reps:
 static int esw_offloads_stop(struct mlx5_eswitch *esw,
 			     struct netlink_ext_ack *extack)
 {
-	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+	int err, err1;
 
-	mlx5_eswitch_disable_sriov(esw);
-	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+	mlx5_eswitch_disable(esw);
+	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
-		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
 		if (err1) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Failed setting eswitch back to offloads");
@@ -1887,19 +2181,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 
 void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 {
-	u16 num_vfs;
-
-	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-		mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
-		flush_workqueue(esw->work_queue);
-		num_vfs = esw->host_info.num_vfs;
-	} else {
-		num_vfs = esw->dev->priv.sriov.num_vfs;
-	}
-
 	mlx5_rdma_disable_roce(esw->dev);
 	esw_offloads_devcom_cleanup(esw);
-	esw_offloads_unload_all_reps(esw, num_vfs);
+	esw_offloads_unload_all_reps(esw);
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		mlx5_eswitch_disable_passing_vport_metadata(esw);
 	esw_offloads_steering_cleanup(esw);
 }
 
@@ -1907,10 +2193,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 {
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
-		*mlx5_mode = SRIOV_LEGACY;
+		*mlx5_mode = MLX5_ESWITCH_LEGACY;
 		break;
 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
-		*mlx5_mode = SRIOV_OFFLOADS;
+		*mlx5_mode = MLX5_ESWITCH_OFFLOADS;
 		break;
 	default:
 		return -EINVAL;
@@ -1922,10 +2208,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
 {
 	switch (mlx5_mode) {
-	case SRIOV_LEGACY:
+	case MLX5_ESWITCH_LEGACY:
 		*mode = DEVLINK_ESWITCH_MODE_LEGACY;
 		break;
-	case SRIOV_OFFLOADS:
+	case MLX5_ESWITCH_OFFLOADS:
 		*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
 		break;
 	default:
@@ -1989,7 +2275,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
 	if(!MLX5_ESWITCH_MANAGER(dev))
 		return -EPERM;
 
-	if (dev->priv.eswitch->mode == SRIOV_NONE &&
+	if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
 	    !mlx5_core_is_ecpf_esw_manager(dev))
 		return -EOPNOTSUPP;
 
@@ -2040,7 +2326,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	int err, vport;
+	int err, vport, num_vport;
 	u8 mlx5_mode;
 
 	err = mlx5_devlink_eswitch_check(devlink);
@@ -2069,7 +2355,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (err)
 		goto out;
 
-	for (vport = 1; vport < esw->enabled_vports; vport++) {
+	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
 		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
 		if (err) {
 			NL_SET_ERR_MSG_MOD(extack,
@@ -2082,7 +2368,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	return 0;
 
 revert_inline_mode:
-	while (--vport > 0)
+	num_vport = --vport;
+	mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
 		mlx5_modify_nic_vport_min_inline(dev,
 						 vport,
 						 esw->offloads.inline_mode);
@@ -2103,7 +2390,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
 
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
 {
 	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
@@ -2112,7 +2399,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
 
-	if (esw->mode == SRIOV_NONE)
+	if (esw->mode == MLX5_ESWITCH_NONE)
 		return -EOPNOTSUPP;
 
 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
@@ -2127,9 +2414,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	}
 
 query_vports:
-	for (vport = 1; vport <= nvfs; vport++) {
+	mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
 		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
-		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+		if (prev_mlx5_mode != mlx5_mode)
 			return -EINVAL;
 		prev_mlx5_mode = mlx5_mode;
 	}
@@ -2139,7 +2427,8 @@ out:
 	return 0;
 }
 
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
 					struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -2158,7 +2447,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
 	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
 		return -EOPNOTSUPP;
 
-	if (esw->mode == SRIOV_LEGACY) {
+	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		esw->offloads.encap = encap;
 		return 0;
 	}
@@ -2188,7 +2477,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
 	return err;
 }
 
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -2203,36 +2493,31 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 }
 
 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
-				      struct mlx5_eswitch_rep_if *__rep_if,
+				      const struct mlx5_eswitch_rep_ops *ops,
 				      u8 rep_type)
 {
-	struct mlx5_eswitch_rep_if *rep_if;
+	struct mlx5_eswitch_rep_data *rep_data;
 	struct mlx5_eswitch_rep *rep;
 	int i;
 
+	esw->offloads.rep_ops[rep_type] = ops;
 	mlx5_esw_for_all_reps(esw, i, rep) {
-		rep_if = &rep->rep_if[rep_type];
-		rep_if->load   = __rep_if->load;
-		rep_if->unload = __rep_if->unload;
-		rep_if->get_proto_dev = __rep_if->get_proto_dev;
-		rep_if->priv = __rep_if->priv;
-
-		atomic_set(&rep_if->state, REP_REGISTERED);
+		rep_data = &rep->rep_data[rep_type];
+		atomic_set(&rep_data->state, REP_REGISTERED);
 	}
 }
 EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
 
 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
 {
-	u16 max_vf = mlx5_core_max_vfs(esw->dev);
 	struct mlx5_eswitch_rep *rep;
 	int i;
 
-	if (esw->mode == SRIOV_OFFLOADS)
-		__unload_reps_all_vport(esw, max_vf, rep_type);
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+		__unload_reps_all_vport(esw, rep_type);
 
 	mlx5_esw_for_all_reps(esw, i, rep)
-		atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED);
+		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
 }
 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
 
@@ -2241,7 +2526,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 	struct mlx5_eswitch_rep *rep;
 
 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
-	return rep->rep_if[rep_type].priv;
+	return rep->rep_data[rep_type].priv;
 }
 
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
@@ -2252,9 +2537,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 
 	rep = mlx5_eswitch_get_rep(esw, vport);
 
-	if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED &&
-	    rep->rep_if[rep_type].get_proto_dev)
-		return rep->rep_if[rep_type].get_proto_dev(rep);
+	if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+	    esw->offloads.rep_ops[rep_type]->get_proto_dev)
+		return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
@@ -2271,3 +2556,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 	return mlx5_eswitch_get_rep(esw, vport);
 }
 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return vport_num >= MLX5_VPORT_FIRST_VF &&
+	       vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+	return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+					      u16 vport_num)
+{
+	return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
new file mode 100644
index 000000000000..1d55a324a17e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/mlx5/fs.h>
+#include "eswitch.h"
+
+struct mlx5_termtbl_handle {
+	struct hlist_node termtbl_hlist;
+
+	struct mlx5_flow_table *termtbl;
+	struct mlx5_flow_act flow_act;
+	struct mlx5_flow_destination dest;
+
+	struct mlx5_flow_handle *rule;
+	int ref_count;
+};
+
+static u32
+mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
+			  struct mlx5_flow_destination *dest)
+{
+	u32 hash;
+
+	hash = jhash_1word(flow_act->action, 0);
+	hash = jhash((const void *)&flow_act->vlan,
+		     sizeof(flow_act->vlan), hash);
+	hash = jhash((const void *)&dest->vport.num,
+		     sizeof(dest->vport.num), hash);
+	hash = jhash((const void *)&dest->vport.vhca_id,
+		     sizeof(dest->vport.num), hash);
+	return hash;
+}
+
+static int
+mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
+			 struct mlx5_flow_destination *dest1,
+			 struct mlx5_flow_act *flow_act2,
+			 struct mlx5_flow_destination *dest2)
+{
+	return flow_act1->action != flow_act2->action ||
+	       dest1->vport.num != dest2->vport.num ||
+	       dest1->vport.vhca_id != dest2->vport.vhca_id ||
+	       memcmp(&flow_act1->vlan, &flow_act2->vlan,
+		      sizeof(flow_act1->vlan));
+}
+
+static int
+mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
+			    struct mlx5_termtbl_handle *tt,
+			    struct mlx5_flow_act *flow_act)
+{
+	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_flow_namespace *root_ns;
+	int prio, flags;
+	int err;
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* As this is the terminating action then the termination table is the
+	 * same prio as the slow path
+	 */
+	prio = FDB_SLOW_PATH;
+	flags = MLX5_FLOW_TABLE_TERMINATION;
+	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1,
+							  0, flags);
+	if (IS_ERR(tt->termtbl)) {
+		esw_warn(dev, "Failed to create termination table\n");
+		return -EOPNOTSUPP;
+	}
+
+	tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act,
+				       &tt->dest, 1);
+
+	if (IS_ERR(tt->rule)) {
+		esw_warn(dev, "Failed to create termination table rule\n");
+		goto add_flow_err;
+	}
+	return 0;
+
+add_flow_err:
+	err = mlx5_destroy_flow_table(tt->termtbl);
+	if (err)
+		esw_warn(dev, "Failed to destroy termination table\n");
+
+	return -EOPNOTSUPP;
+}
+
+static struct mlx5_termtbl_handle *
+mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
+				struct mlx5_flow_act *flow_act,
+				struct mlx5_flow_destination *dest)
+{
+	struct mlx5_termtbl_handle *tt;
+	bool found = false;
+	u32 hash_key;
+	int err;
+
+	mutex_lock(&esw->offloads.termtbl_mutex);
+
+	hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest);
+	hash_for_each_possible(esw->offloads.termtbl_tbl, tt,
+			       termtbl_hlist, hash_key) {
+		if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest,
+					      flow_act, dest)) {
+			found = true;
+			break;
+		}
+	}
+	if (found)
+		goto tt_add_ref;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt) {
+		err = -ENOMEM;
+		goto tt_create_err;
+	}
+
+	tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	tt->dest.vport.num = dest->vport.num;
+	tt->dest.vport.vhca_id = dest->vport.vhca_id;
+	memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
+
+	err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
+	if (err) {
+		esw_warn(esw->dev, "Failed to create termination table\n");
+		goto tt_create_err;
+	}
+	hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
+tt_add_ref:
+	tt->ref_count++;
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+	return tt;
+tt_create_err:
+	kfree(tt);
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+	return ERR_PTR(err);
+}
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+			 struct mlx5_termtbl_handle *tt)
+{
+	mutex_lock(&esw->offloads.termtbl_mutex);
+	if (--tt->ref_count == 0)
+		hash_del(&tt->termtbl_hlist);
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+
+	if (!tt->ref_count) {
+		mlx5_del_flow_rules(tt->rule);
+		mlx5_destroy_flow_table(tt->termtbl);
+		kfree(tt);
+	}
+}
+
+static void
+mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
+				  struct mlx5_flow_act *dst)
+{
+	if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+		return;
+
+	src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+	dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+	memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
+	memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
+
+	if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+		return;
+
+	src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+	dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+	memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
+	memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+}
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_spec *spec)
+{
+	u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+				 misc_parameters.source_port);
+	u32 port_value = MLX5_GET(fte_match_param, spec->match_value,
+				  misc_parameters.source_port);
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
+		return false;
+
+	/* push vlan on RX */
+	return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
+		((port_mask & port_value) == MLX5_VPORT_UPLINK);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_table *fdb,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_destination *dest,
+			      int num_dest)
+{
+	struct mlx5_flow_act term_tbl_act = {};
+	struct mlx5_flow_handle *rule = NULL;
+	bool term_table_created = false;
+	int num_vport_dests = 0;
+	int i, curr_dest;
+
+	mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act);
+	term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	for (i = 0; i < num_dest; i++) {
+		struct mlx5_termtbl_handle *tt;
+
+		/* only vport destinations can be terminated */
+		if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
+			continue;
+
+		/* get the terminating table for the action list */
+		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
+						     &dest[i]);
+		if (IS_ERR(tt)) {
+			esw_warn(esw->dev, "Failed to create termination table\n");
+			goto revert_changes;
+		}
+		attr->dests[num_vport_dests].termtbl = tt;
+		num_vport_dests++;
+
+		/* link the destination with the termination table */
+		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest[i].ft = tt->termtbl;
+		term_table_created = true;
+	}
+
+	/* at least one destination should reference a termination table */
+	if (!term_table_created)
+		goto revert_changes;
+
+	/* create the FTE */
+	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+	if (IS_ERR(rule))
+		goto revert_changes;
+
+	goto out;
+
+revert_changes:
+	/* revert the changes that were made to the original flow_act
+	 * and fall-back to the original rule actions
+	 */
+	mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act);
+
+	for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
+		struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+
+		/* search for the destination associated with the
+		 * current term table
+		 */
+		for (i = 0; i < num_dest; i++) {
+			if (dest[i].ft != tt->termtbl)
+				continue;
+
+			memset(&dest[i], 0, sizeof(dest[i]));
+			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+			dest[i].vport.num = tt->dest.vport.num;
+			dest[i].vport.vhca_id = tt->dest.vport.vhca_id;
+			mlx5_eswitch_termtbl_put(esw, tt);
+			break;
+		}
+	}
+	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+out:
+	return rule;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index a81e8d2168d8..8bcf3426b9c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_STALL_EVENT";
 	case MLX5_EVENT_TYPE_CMD:
 		return "MLX5_EVENT_TYPE_CMD";
-	case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
-		return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
+	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
+		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 	case MLX5_EVENT_TYPE_PAGE_FAULT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index ca2296a2f9ee..4c50efe4e7f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
 	mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
 }
 
-static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq,
+				       struct mlx5_eqe *eqe)
 {
 	struct mlx5_fpga_conn *conn;
 
@@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	struct mlx5_fpga_device *fdev = conn->fdev;
 	struct mlx5_core_dev *mdev = fdev->mdev;
 	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	struct mlx5_wq_param wqp;
 	struct mlx5_cqe64 *cqe;
 	int inlen, err, eqn;
@@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
 
-	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out));
 	kvfree(in);
 
 	if (err)
@@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
 	conn->cb_arg = attr->cb_arg;
 
 	remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
-	err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+	err = mlx5_query_mac_address(fdev->mdev, remote_mac);
 	if (err) {
 		mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
 		ret = ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 52c47d3dd5a5..c76da309506b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
 					   u8 match_criteria_enable,
 					   const u32 *match_c,
 					   const u32 *match_v,
-					   struct mlx5_flow_act *flow_act)
+					   struct mlx5_flow_act *flow_act,
+					   struct mlx5_flow_context *flow_context)
 {
 	const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   outer_headers);
@@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
 	    (match_criteria_enable &
 	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
 	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
-	     (flow_act->flags & FLOW_ACT_HAS_TAG))
+	     (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
 		return false;
 
 	return true;
@@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
 					    fg->mask.match_criteria_enable,
 					    fg->mask.match_criteria,
 					    fte->val,
-					    &fte->action))
+					    &fte->action,
+					    &fte->flow_context))
 		return ERR_PTR(-EINVAL);
 	else if (!mlx5_is_fpga_ipsec_rule(mdev,
 					  fg->mask.match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 2b5e63b0d4d6..382985e65b48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -37,8 +37,6 @@
 #include "accel/ipsec.h"
 #include "fs_cmd.h"
 
-#ifdef CONFIG_MLX5_FPGA
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
@@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
 const struct mlx5_flow_cmds *
 mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
-#else
-
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline unsigned int
-mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
-						u64 *counters)
-{
-	return 0;
-}
-
-static inline void *
-mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
-			      struct mlx5_accel_esp_xfrm *accel_xfrm,
-			      const __be32 saddr[4],
-			      const __be32 daddr[4],
-			      const __be32 spi, bool is_ipv6)
-{
-	return NULL;
-}
-
-static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
-}
-
-static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
-}
-
-static inline struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
-			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
-			  u32 flags)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
-}
-
-static inline int
-mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
-			  const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
-	return mlx5_fs_cmd_get_default(type);
-}
-
-#endif /* CONFIG_MLX5_FPGA */
-
 #endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 013b1ca4a791..7ac1249eadc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 {
 	int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
 	int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+	int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	struct mlx5_core_dev *dev = ns->dev;
@@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 		 en_decap);
 	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
 		 en_encap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table,
+		 term);
 
 	switch (ft->op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
@@ -393,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 
-	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, flow_tag,
+		 fte->flow_context.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, flow_source,
+		 fte->flow_context.flow_source);
+
 	MLX5_SET(flow_context, in_flow_context, extended_destination,
 		 extended_dest);
 	if (extended_dest) {
@@ -768,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 		max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
 		table_type = FS_FT_NIC_TX;
 		break;
+	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+		max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+		table_type = FS_FT_ESW_INGRESS_ACL;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe76c6fd6d80..3e99799bdb40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -584,7 +584,7 @@ err_ida_remove:
 }
 
 static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
-				u32 *match_value,
+				const struct mlx5_flow_spec *spec,
 				struct mlx5_flow_act *flow_act)
 {
 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 	if (!fte)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(fte->val, match_value, sizeof(fte->val));
+	memcpy(fte->val, &spec->match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
 	fte->action = *flow_act;
+	fte->flow_context = spec->flow_context;
 
 	tree_init_node(&fte->node, NULL, del_sw_fte);
 
@@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering,
 
 static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
 						u8 match_criteria_enable,
-						void *match_criteria,
+						const void *match_criteria,
 						int start_index,
 						int end_index)
 {
@@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer
 
 static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
 						       u8 match_criteria_enable,
-						       void *match_criteria,
+						       const void *match_criteria,
 						       int start_index,
 						       int end_index,
 						       struct list_head *prev)
@@ -1285,7 +1286,7 @@ free_handle:
 }
 
 static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft,
-						     struct mlx5_flow_spec *spec)
+						     const struct mlx5_flow_spec *spec)
 {
 	struct list_head *prev = &ft->node.children;
 	struct mlx5_flow_group *fg;
@@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 	return false;
 }
 
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+				  const struct mlx5_flow_context *flow_context,
+				  const struct mlx5_flow_act *flow_act)
 {
 	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
@@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 		return -EEXIST;
 	}
 
-	if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
-	    fte->action.flow_tag != flow_act->flow_tag) {
+	if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+	    fte->flow_context.flow_tag != flow_context->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->action.flow_tag,
-			       flow_act->flow_tag);
+			       fte->flow_context.flow_tag,
+			       flow_context->flow_tag);
 		return -EEXIST;
 	}
 
@@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 }
 
 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
-					    u32 *match_value,
+					    const struct mlx5_flow_spec *spec,
 					    struct mlx5_flow_act *flow_act,
 					    struct mlx5_flow_destination *dest,
 					    int dest_num,
@@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	int i;
 	int ret;
 
-	ret = check_conflicting_ftes(fte, flow_act);
+	ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head)
 
 static int build_match_list(struct match_list_head *match_head,
 			    struct mlx5_flow_table *ft,
-			    struct mlx5_flow_spec *spec)
+			    const struct mlx5_flow_spec *spec)
 {
 	struct rhlist_head *tmp, *list;
 	struct mlx5_flow_group *g;
@@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
 
 static struct fs_fte *
 lookup_fte_locked(struct mlx5_flow_group *g,
-		  u32 *match_value,
+		  const u32 *match_value,
 		  bool take_write)
 {
 	struct fs_fte *fte_tmp;
@@ -1622,7 +1625,7 @@ out:
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
-		       struct mlx5_flow_spec *spec,
+		       const struct mlx5_flow_spec *spec,
 		       struct mlx5_flow_act *flow_act,
 		       struct mlx5_flow_destination *dest,
 		       int dest_num,
@@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 	u64  version;
 	int err;
 
-	fte = alloc_fte(ft, spec->match_value, flow_act);
+	fte = alloc_fte(ft, spec, flow_act);
 	if (IS_ERR(fte))
 		return  ERR_PTR(-ENOMEM);
 
@@ -1653,8 +1656,7 @@ search_again_locked:
 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
 		if (!fte_tmp)
 			continue;
-		rule = add_rule_fg(g, spec->match_value,
-				   flow_act, dest, dest_num, fte_tmp);
+		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
 		up_write_ref_node(&fte_tmp->node, false);
 		tree_put_node(&fte_tmp->node, false);
 		kmem_cache_free(steering->ftes_cache, fte);
@@ -1701,8 +1703,7 @@ skip_search:
 
 		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
 		up_write_ref_node(&g->node, false);
-		rule = add_rule_fg(g, spec->match_value,
-				   flow_act, dest, dest_num, fte);
+		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
 		up_write_ref_node(&fte->node, false);
 		tree_put_node(&fte->node, false);
 		return rule;
@@ -1715,7 +1716,7 @@ out:
 
 static struct mlx5_flow_handle *
 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
-		     struct mlx5_flow_spec *spec,
+		     const struct mlx5_flow_spec *spec,
 		     struct mlx5_flow_act *flow_act,
 		     struct mlx5_flow_destination *dest,
 		     int dest_num)
@@ -1788,7 +1789,7 @@ search_again_locked:
 	if (err)
 		goto err_release_fg;
 
-	fte = alloc_fte(ft, spec->match_value, flow_act);
+	fte = alloc_fte(ft, spec, flow_act);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
 		goto err_release_fg;
@@ -1802,8 +1803,7 @@ search_again_locked:
 
 	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
 	up_write_ref_node(&g->node, false);
-	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
-			   dest_num, fte);
+	rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
 	up_write_ref_node(&fte->node, false);
 	tree_put_node(&fte->node, false);
 	tree_put_node(&g->node, false);
@@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 
 struct mlx5_flow_handle *
 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
-		    struct mlx5_flow_spec *spec,
+		    const struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
 		    int num_dest)
@@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 
-	if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+	if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
 		return NULL;
 
 	switch (type) {
@@ -2423,7 +2423,7 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
 	if (!steering->esw_egress_root_ns)
 		return;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
 
 	kfree(steering->esw_egress_root_ns);
@@ -2438,7 +2438,7 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
 	if (!steering->esw_ingress_root_ns)
 		return;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
 
 	kfree(steering->esw_ingress_root_ns);
@@ -2606,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo
 static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
-	steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
-					       sizeof(*steering->esw_egress_root_ns),
-					       GFP_KERNEL);
+	steering->esw_egress_root_ns =
+			kcalloc(total_vports,
+				sizeof(*steering->esw_egress_root_ns),
+				GFP_KERNEL);
 	if (!steering->esw_egress_root_ns)
 		return -ENOMEM;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+	for (i = 0; i < total_vports; i++) {
 		err = init_egress_acl_root_ns(steering, i);
 		if (err)
 			goto cleanup_root_ns;
@@ -2634,16 +2636,18 @@ cleanup_root_ns:
 static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
-	steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
-						sizeof(*steering->esw_ingress_root_ns),
-						GFP_KERNEL);
+	steering->esw_ingress_root_ns =
+			kcalloc(total_vports,
+				sizeof(*steering->esw_ingress_root_ns),
+				GFP_KERNEL);
 	if (!steering->esw_ingress_root_ns)
 		return -ENOMEM;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+	for (i = 0; i < total_vports; i++) {
 		err = init_ingress_acl_root_ns(steering, i);
 		if (err)
 			goto cleanup_root_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a08c3d09a50f..c48c382f926f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -170,6 +170,7 @@ struct fs_fte {
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
 	u32				index;
+	struct mlx5_flow_context	flow_context;
 	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index c6c28f56aa29..b3762123a69c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -102,13 +102,15 @@ static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	unsigned long next_id = (unsigned long)id + 1;
 	struct mlx5_fc *counter;
+	unsigned long tmp;
 
 	rcu_read_lock();
 	/* skip counters that are in idr, but not yet in counters list */
-	while ((counter = idr_get_next_ul(&fc_stats->counters_idr,
-					  &next_id)) != NULL &&
-	       list_empty(&counter->list))
-		next_id++;
+	idr_for_each_entry_continue_ul(&fc_stats->counters_idr,
+				       counter, tmp, next_id) {
+		if (!list_empty(&counter->list))
+			break;
+	}
 	rcu_read_unlock();
 
 	return counter ? &counter->list : &fc_stats->counters;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 1ab6f7e3bec6..a19790dee7b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -37,6 +37,37 @@
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
 
+enum {
+	MCQS_IDENTIFIER_BOOT_IMG	= 0x1,
+	MCQS_IDENTIFIER_OEM_NVCONFIG	= 0x4,
+	MCQS_IDENTIFIER_MLNX_NVCONFIG	= 0x5,
+	MCQS_IDENTIFIER_CS_TOKEN	= 0x6,
+	MCQS_IDENTIFIER_DBG_TOKEN	= 0x7,
+	MCQS_IDENTIFIER_GEARBOX		= 0xA,
+};
+
+enum {
+	MCQS_UPDATE_STATE_IDLE,
+	MCQS_UPDATE_STATE_IN_PROGRESS,
+	MCQS_UPDATE_STATE_APPLIED,
+	MCQS_UPDATE_STATE_ACTIVE,
+	MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET,
+	MCQS_UPDATE_STATE_FAILED,
+	MCQS_UPDATE_STATE_CANCELED,
+	MCQS_UPDATE_STATE_BUSY,
+};
+
+enum {
+	MCQI_INFO_TYPE_CAPABILITIES	  = 0x0,
+	MCQI_INFO_TYPE_VERSION		  = 0x1,
+	MCQI_INFO_TYPE_ACTIVATION_METHOD  = 0x5,
+};
+
+enum {
+	MCQI_FW_RUNNING_VERSION = 0,
+	MCQI_FW_STORED_VERSION  = 1,
+};
+
 static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
 				  int outlen)
 {
@@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, event_cap)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+		if (err)
+			return err;
+	}
+
+	if (MLX5_CAP_GEN(dev, tls)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
 }
 
 static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
-			       u16 component_index,
-			       u32 *max_component_size,
-			       u8 *log_mcda_word_size,
-			       u16 *mcda_max_write_size)
+			       u16 component_index, bool read_pending,
+			       u8 info_type, u16 data_size, void *mcqi_data)
 {
-	u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
-	int offset = MLX5_ST_SZ_DW(mcqi_reg);
-	u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+	u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {};
+	u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {};
+	void *data;
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(mcqi_reg, in, component_index, component_index);
-	MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+	MLX5_SET(mcqi_reg, in, read_pending_component, read_pending);
+	MLX5_SET(mcqi_reg, in, info_type, info_type);
+	MLX5_SET(mcqi_reg, in, data_size, data_size);
 
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_MCQI, 0, 0);
+				   MLX5_ST_SZ_BYTES(mcqi_reg) + data_size,
+				   MLX5_REG_MCQI, 0, 0);
 	if (err)
-		goto out;
+		return err;
 
-	*max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
-	*log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
-	*mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+	data = MLX5_ADDR_OF(mcqi_reg, out, data);
+	memcpy(mcqi_data, data, data_size);
 
-out:
-	return err;
+	return 0;
+}
+
+static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index,
+				    u32 *max_component_size, u8 *log_mcda_word_size,
+				    u16 *mcda_max_write_size)
+{
+	u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {};
+	int err;
+
+	err = mlx5_reg_mcqi_query(dev, component_index, 0,
+				  MCQI_INFO_TYPE_CAPABILITIES,
+				  MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg);
+	if (err)
+		return err;
+
+	*max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size);
+	*log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size);
+	*mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size);
+
+	return 0;
 }
 
 struct mlx5_mlxfw_dev {
@@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
 		container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
 	struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
 
-	return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
-				   p_align_bits, p_max_write_size);
+	if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) {
+		mlx5_core_warn(dev, "caps query isn't supported by running FW\n");
+		return -EOPNOTSUPP;
+	}
+
+	return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size,
+					p_align_bits, p_max_write_size);
 }
 
 static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
@@ -552,7 +616,8 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
 };
 
 int mlx5_firmware_flash(struct mlx5_core_dev *dev,
-			const struct firmware *firmware)
+			const struct firmware *firmware,
+			struct netlink_ext_ack *extack)
 {
 	struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
 		.mlxfw_dev = {
@@ -571,5 +636,133 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev,
 		return -EOPNOTSUPP;
 	}
 
-	return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware);
+	return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev,
+				    firmware, extack);
+}
+
+static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev,
+				       u16 component_index, bool read_pending,
+				       u32 *mcqi_version_out)
+{
+	return mlx5_reg_mcqi_query(dev, component_index, read_pending,
+				   MCQI_INFO_TYPE_VERSION,
+				   MLX5_ST_SZ_BYTES(mcqi_version),
+				   mcqi_version_out);
+}
+
+static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out,
+			       u16 component_index)
+{
+	u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg);
+	u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+	int err;
+
+	memset(out, 0, out_sz);
+
+	MLX5_SET(mcqs_reg, in, component_index, component_index);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+				   out_sz, MLX5_REG_MCQS, 0, 0);
+	return err;
+}
+
+/* scans component index sequentially, to find the boot img index */
+static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+	u16 identifier, component_idx = 0;
+	bool quit;
+	int err;
+
+	do {
+		err = mlx5_reg_mcqs_query(dev, out, component_idx);
+		if (err)
+			return err;
+
+		identifier = MLX5_GET(mcqs_reg, out, identifier);
+		quit = !!MLX5_GET(mcqs_reg, out, last_index_flag);
+		quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG;
+	} while (!quit && ++component_idx);
+
+	if (identifier != MCQS_IDENTIFIER_BOOT_IMG) {
+		mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n",
+			       component_idx);
+		return -EOPNOTSUPP;
+	}
+
+	return component_idx;
+}
+
+static int
+mlx5_fw_image_pending(struct mlx5_core_dev *dev,
+		      int component_index,
+		      bool *pending_version_exists)
+{
+	u32 out[MLX5_ST_SZ_DW(mcqs_reg)];
+	u8 component_update_state;
+	int err;
+
+	err = mlx5_reg_mcqs_query(dev, out, component_index);
+	if (err)
+		return err;
+
+	component_update_state = MLX5_GET(mcqs_reg, out, component_update_state);
+
+	if (component_update_state == MCQS_UPDATE_STATE_IDLE) {
+		*pending_version_exists = false;
+	} else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) {
+		*pending_version_exists = true;
+	} else {
+		mlx5_core_warn(dev,
+			       "mcqs: can't read pending fw version while fw state is %d\n",
+			       component_update_state);
+		return -ENODATA;
+	}
+	return 0;
+}
+
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+			  u32 *running_ver, u32 *pending_ver)
+{
+	u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
+	bool pending_version_exists;
+	int component_index;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+	    !MLX5_CAP_MCAM_REG(dev, mcqs)) {
+		mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
+		return -EOPNOTSUPP;
+	}
+
+	component_index = mlx5_get_boot_img_component_index(dev);
+	if (component_index < 0)
+		return component_index;
+
+	err = mlx5_reg_mcqi_version_query(dev, component_index,
+					  MCQI_FW_RUNNING_VERSION,
+					  reg_mcqi_version);
+	if (err)
+		return err;
+
+	*running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+	err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
+	if (err)
+		return err;
+
+	if (!pending_version_exists) {
+		*pending_ver = 0;
+		return 0;
+	}
+
+	err = mlx5_reg_mcqi_version_query(dev, component_index,
+					  MCQI_FW_STORED_VERSION,
+					  reg_mcqi_version);
+	if (err)
+		return err;
+
+	*pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a2656f4008d9..2fe6923f7ce0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -40,6 +40,8 @@
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "lib/mlx5.h"
+#include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
 
 enum {
 	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
@@ -62,12 +64,20 @@ enum {
 
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
-	MLX5_DROP_NEW_RECOVERY_WORK,
+};
+
+enum  {
+	MLX5_SENSOR_NO_ERR		= 0,
+	MLX5_SENSOR_PCI_COMM_ERR	= 1,
+	MLX5_SENSOR_PCI_ERR		= 2,
+	MLX5_SENSOR_NIC_DISABLED	= 3,
+	MLX5_SENSOR_NIC_SW_RESET	= 4,
+	MLX5_SENSOR_FW_SYND_RFR		= 5,
 };
 
 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
 {
-	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
 }
 
 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
@@ -80,18 +90,105 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
 		    &dev->iseg->cmdq_addr_l_sz);
 }
 
-static int in_fatal(struct mlx5_core_dev *dev)
+static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
 
+	/* Offline PCI reads return 0xffffffff */
+	return (ioread32be(&h->fw_ver) == 0xffffffff);
+}
+
+static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
+	u8 synd = ioread8(&h->synd);
+
+	if (rfr && synd)
+		mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
+	return rfr && synd;
+}
+
+static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+{
+	if (sensor_pci_not_working(dev))
+		return MLX5_SENSOR_PCI_COMM_ERR;
+	if (pci_channel_offline(dev->pdev))
+		return MLX5_SENSOR_PCI_ERR;
 	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
-		return 1;
+		return MLX5_SENSOR_NIC_DISABLED;
+	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
+		return MLX5_SENSOR_NIC_SW_RESET;
+	if (sensor_fw_synd_rfr(dev))
+		return MLX5_SENSOR_FW_SYND_RFR;
 
-	if (ioread32be(&h->fw_ver) == 0xffffffff)
-		return 1;
+	return MLX5_SENSOR_NO_ERR;
+}
 
-	return 0;
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
+{
+	enum mlx5_vsc_state state;
+	int ret;
+
+	if (!mlx5_core_is_pf(dev))
+		return -EBUSY;
+
+	/* Try to lock GW access, this stage doesn't return
+	 * EBUSY because locked GW does not mean that other PF
+	 * already started the reset.
+	 */
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret == -EBUSY)
+		return -EINVAL;
+	if (ret)
+		return ret;
+
+	state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
+	/* At this stage, if the return status == EBUSY, then we know
+	 * for sure that another PF started the reset, so don't allow
+	 * another reset.
+	 */
+	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
+	if (ret)
+		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+
+	/* Unlock GW access */
+	mlx5_vsc_gw_unlock(dev);
+
+	return ret;
+}
+
+static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
+{
+	bool supported = (ioread32be(&dev->iseg->initializing) >>
+			  MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+	u32 fatal_error;
+
+	if (!supported)
+		return false;
+
+	/* The reset only needs to be issued by one PF. The health buffer is
+	 * shared between all functions, and will be cleared during a reset.
+	 * Check again to avoid a redundant 2nd reset. If the fatal erros was
+	 * PCI related a reset won't help.
+	 */
+	fatal_error = check_fatal_sensors(dev);
+	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
+	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
+	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+		mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
+		return false;
+	}
+
+	mlx5_core_warn(dev, "Issuing FW Reset\n");
+	/* Write the NIC interface field to initiate the reset, the command
+	 * interface address also resides here, don't overwrite it.
+	 */
+	mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+
+	return true;
 }
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
@@ -99,14 +196,65 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto unlock;
+	if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		goto unlock;
+	}
 
-	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
+	if (check_fatal_sensors(dev) || force) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 		mlx5_cmd_flush(dev);
 	}
 
 	mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+unlock:
+	mutex_unlock(&dev->intf_state_mutex);
+}
+
+#define MLX5_CRDUMP_WAIT_MS	60000
+#define MLX5_FW_RESET_WAIT_MS	1000
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
+{
+	unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+	int lock = -EBUSY;
+
+	mutex_lock(&dev->intf_state_mutex);
+	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		goto unlock;
+
+	mlx5_core_err(dev, "start\n");
+
+	if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+		/* Get cr-dump and reset FW semaphore */
+		lock = lock_sem_sw_reset(dev, true);
+
+		if (lock == -EBUSY) {
+			delay_ms = MLX5_CRDUMP_WAIT_MS;
+			goto recover_from_sw_reset;
+		}
+		/* Execute SW reset */
+		reset_fw_if_needed(dev);
+	}
+
+recover_from_sw_reset:
+	/* Recover from SW reset */
+	end = jiffies + msecs_to_jiffies(delay_ms);
+	do {
+		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+			break;
+
+		cond_resched();
+	} while (!time_after(jiffies, end));
+
+	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+			mlx5_get_nic_state(dev), delay_ms);
+	}
+
+	/* Release FW semaphore if you are the lock owner */
+	if (!lock)
+		lock_sem_sw_reset(dev, false);
+
 	mlx5_core_err(dev, "end\n");
 
 unlock:
@@ -129,6 +277,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 	case MLX5_NIC_IFC_NO_DRAM_NIC:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
 		break;
+
+	case MLX5_NIC_IFC_SW_RESET:
+		/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
+		 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
+		 *    and this is a VF), this is not recoverable by SW reset.
+		 *    Logging of this is handled elsewhere.
+		 * 2. FW reset has been issued by another function, driver can
+		 *    be reloaded to recover after the mode switches to
+		 *    MLX5_NIC_IFC_DISABLED.
+		 */
+		if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
+			mlx5_core_warn(dev, "NIC SW reset in progress\n");
+		break;
+
 	default:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
 			       nic_interface);
@@ -137,52 +299,32 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 	mlx5_disable_device(dev);
 }
 
-static void health_recover(struct work_struct *work)
-{
-	struct mlx5_core_health *health;
-	struct delayed_work *dwork;
-	struct mlx5_core_dev *dev;
-	struct mlx5_priv *priv;
-	u8 nic_state;
-
-	dwork = container_of(work, struct delayed_work, work);
-	health = container_of(dwork, struct mlx5_core_health, recover_work);
-	priv = container_of(health, struct mlx5_priv, health);
-	dev = container_of(priv, struct mlx5_core_dev, priv);
-
-	nic_state = mlx5_get_nic_state(dev);
-	if (nic_state == MLX5_NIC_IFC_INVALID) {
-		mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n");
-		return;
-	}
-
-	mlx5_core_err(dev, "starting health recovery flow\n");
-	mlx5_recover_device(dev);
-}
-
 /* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_DELAY_MSECS 60000
-static void health_care(struct work_struct *work)
+#define MLX5_RECOVERY_WAIT_MSECS 60000
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 {
-	unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
-	struct mlx5_core_health *health;
-	struct mlx5_core_dev *dev;
-	struct mlx5_priv *priv;
-	unsigned long flags;
+	unsigned long end;
 
-	health = container_of(work, struct mlx5_core_health, work);
-	priv = container_of(health, struct mlx5_priv, health);
-	dev = container_of(priv, struct mlx5_core_dev, priv);
 	mlx5_core_warn(dev, "handling bad device here\n");
 	mlx5_handle_bad_state(dev);
+	end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+	while (sensor_pci_not_working(dev)) {
+		if (time_after(jiffies, end)) {
+			mlx5_core_err(dev,
+				      "health recovery flow aborted, PCI reads still not working\n");
+			return -EIO;
+		}
+		msleep(100);
+	}
 
-	spin_lock_irqsave(&health->wq_lock, flags);
-	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
-		schedule_delayed_work(&health->recover_work, recover_delay);
-	else
-		mlx5_core_err(dev,
-			      "new health works are not permitted at this stage\n");
-	spin_unlock_irqrestore(&health->wq_lock, flags);
+	mlx5_core_err(dev, "starting health recovery flow\n");
+	mlx5_recover_device(dev);
+	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
+	    check_fatal_sensors(dev)) {
+		mlx5_core_err(dev, "health recovery failed\n");
+		return -EIO;
+	}
+	return 0;
 }
 
 static const char *hsynd_str(u8 synd)
@@ -246,6 +388,282 @@ static void print_health_info(struct mlx5_core_dev *dev)
 	mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
 }
 
+static int
+mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+			  struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u8 synd;
+	int err;
+
+	synd = ioread8(&h->synd);
+	err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
+	if (err || !synd)
+		return err;
+	return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
+}
+
+struct mlx5_fw_reporter_ctx {
+	u8 err_synd;
+	int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+			       struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+	int err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
+				       fw_reporter_ctx->err_synd);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+					fw_reporter_ctx->miss_counter);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int
+mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
+				       struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	int err;
+	int i;
+
+	if (!ioread8(&h->synd))
+		return 0;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
+	if (err)
+		return err;
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
+		err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
+		if (err)
+			return err;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
+					ioread32be(&h->assert_exit_ptr));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
+					ioread32be(&h->assert_callra));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+	if (err)
+		return err;
+	err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
+				       ioread8(&h->irisc_index));
+	if (err)
+		return err;
+	err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
+					ioread16be(&h->ext_synd));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
+					ioread32be(&h->fw_ver));
+	if (err)
+		return err;
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+	return devlink_fmsg_pair_nest_end(fmsg);
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+		      struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	int err;
+
+	err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+	if (err)
+		return err;
+
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+		if (err)
+			return err;
+	}
+
+	err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
+	if (err)
+		return err;
+	return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
+static void mlx5_fw_reporter_err_work(struct work_struct *work)
+{
+	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+	struct mlx5_core_health *health;
+
+	health = container_of(work, struct mlx5_core_health, report_work);
+
+	if (IS_ERR_OR_NULL(health->fw_reporter))
+		return;
+
+	fw_reporter_ctx.err_synd = health->synd;
+	fw_reporter_ctx.miss_counter = health->miss_counter;
+	if (fw_reporter_ctx.err_synd) {
+		devlink_health_report(health->fw_reporter,
+				      "FW syndrom reported", &fw_reporter_ctx);
+		return;
+	}
+	if (fw_reporter_ctx.miss_counter)
+		devlink_health_report(health->fw_reporter,
+				      "FW miss counter reported",
+				      &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+		.name = "fw",
+		.diagnose = mlx5_fw_reporter_diagnose,
+		.dump = mlx5_fw_reporter_dump,
+};
+
+static int
+mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+			       void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+	return mlx5_health_try_recover(dev);
+}
+
+#define MLX5_CR_DUMP_CHUNK_SIZE 256
+static int
+mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+			    struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	u32 crdump_size = dev->priv.health.crdump_size;
+	u32 *cr_data;
+	u32 data_size;
+	u32 offset;
+	int err;
+
+	if (!mlx5_core_is_pf(dev))
+		return -EPERM;
+
+	cr_data = kvmalloc(crdump_size, GFP_KERNEL);
+	if (!cr_data)
+		return -ENOMEM;
+	err = mlx5_crdump_collect(dev, cr_data);
+	if (err)
+		return err;
+
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+		if (err)
+			goto free_data;
+	}
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data");
+	if (err)
+		goto free_data;
+	for (offset = 0; offset < crdump_size; offset += data_size) {
+		if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE)
+			data_size = crdump_size - offset;
+		else
+			data_size = MLX5_CR_DUMP_CHUNK_SIZE;
+		err = devlink_fmsg_binary_put(fmsg, cr_data, data_size);
+		if (err)
+			goto free_data;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+
+free_data:
+	kfree(cr_data);
+	return err;
+}
+
+static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+{
+	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+	struct mlx5_core_health *health;
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+
+	health = container_of(work, struct mlx5_core_health, fatal_report_work);
+	priv = container_of(health, struct mlx5_priv, health);
+	dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	mlx5_enter_error_state(dev, false);
+	if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+		if (mlx5_health_try_recover(dev))
+			mlx5_core_err(dev, "health recovery failed\n");
+		return;
+	}
+	fw_reporter_ctx.err_synd = health->synd;
+	fw_reporter_ctx.miss_counter = health->miss_counter;
+	devlink_health_report(health->fw_fatal_reporter,
+			      "FW fatal error reported", &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+		.name = "fw_fatal",
+		.recover = mlx5_fw_fatal_reporter_recover,
+		.dump = mlx5_fw_fatal_reporter_dump,
+};
+
+#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
+static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	health->fw_reporter =
+		devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
+					       0, false, dev);
+	if (IS_ERR(health->fw_reporter))
+		mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
+			       PTR_ERR(health->fw_reporter));
+
+	health->fw_fatal_reporter =
+		devlink_health_reporter_create(devlink,
+					       &mlx5_fw_fatal_reporter_ops,
+					       MLX5_REPORTER_FW_GRACEFUL_PERIOD,
+					       true, dev);
+	if (IS_ERR(health->fw_fatal_reporter))
+		mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
+			       PTR_ERR(health->fw_fatal_reporter));
+}
+
+static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	if (!IS_ERR_OR_NULL(health->fw_reporter))
+		devlink_health_reporter_destroy(health->fw_reporter);
+
+	if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
+		devlink_health_reporter_destroy(health->fw_fatal_reporter);
+}
+
 static unsigned long get_next_poll_jiffies(void)
 {
 	unsigned long next;
@@ -264,7 +682,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 
 	spin_lock_irqsave(&health->wq_lock, flags);
 	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
-		queue_work(health->wq, &health->work);
+		queue_work(health->wq, &health->fatal_report_work);
 	else
 		mlx5_core_err(dev, "new health works are not permitted at this stage\n");
 	spin_unlock_irqrestore(&health->wq_lock, flags);
@@ -274,6 +692,9 @@ static void poll_health(struct timer_list *t)
 {
 	struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
 	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u32 fatal_error;
+	u8 prev_synd;
 	u32 count;
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
@@ -289,10 +710,19 @@ static void poll_health(struct timer_list *t)
 	if (health->miss_counter == MAX_MISSES) {
 		mlx5_core_err(dev, "device's health compromised - reached miss count\n");
 		print_health_info(dev);
+		queue_work(health->wq, &health->report_work);
 	}
 
-	if (in_fatal(dev) && !health->sick) {
-		health->sick = true;
+	prev_synd = health->synd;
+	health->synd = ioread8(&h->synd);
+	if (health->synd && health->synd != prev_synd)
+		queue_work(health->wq, &health->report_work);
+
+	fatal_error = check_fatal_sensors(dev);
+
+	if (fatal_error && !health->fatal_error) {
+		mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
+		dev->priv.health.fatal_error = fatal_error;
 		print_health_info(dev);
 		mlx5_trigger_health_work(dev);
 	}
@@ -306,9 +736,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	timer_setup(&health->timer, poll_health, 0);
-	health->sick = 0;
+	health->fatal_error = MLX5_SENSOR_NO_ERR;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
@@ -324,7 +753,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 	if (disable_health) {
 		spin_lock_irqsave(&health->wq_lock, flags);
 		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-		set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 		spin_unlock_irqrestore(&health->wq_lock, flags);
 	}
 
@@ -338,21 +766,9 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 
 	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	spin_unlock_irqrestore(&health->wq_lock, flags);
-	cancel_delayed_work_sync(&health->recover_work);
-	cancel_work_sync(&health->work);
-}
-
-void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
-{
-	struct mlx5_core_health *health = &dev->priv.health;
-	unsigned long flags;
-
-	spin_lock_irqsave(&health->wq_lock, flags);
-	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
-	spin_unlock_irqrestore(&health->wq_lock, flags);
-	cancel_delayed_work_sync(&dev->priv.health.recover_work);
+	cancel_work_sync(&health->report_work);
+	cancel_work_sync(&health->fatal_report_work);
 }
 
 void mlx5_health_flush(struct mlx5_core_dev *dev)
@@ -367,6 +783,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	destroy_workqueue(health->wq);
+	mlx5_fw_reporters_destroy(dev);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -374,20 +791,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 	struct mlx5_core_health *health;
 	char *name;
 
+	mlx5_fw_reporters_create(dev);
+
 	health = &dev->priv.health;
 	name = kmalloc(64, GFP_KERNEL);
 	if (!name)
-		return -ENOMEM;
+		goto out_err;
 
 	strcpy(name, "mlx5_health");
 	strcat(name, dev_name(dev->device));
 	health->wq = create_singlethread_workqueue(name);
 	kfree(name);
 	if (!health->wq)
-		return -ENOMEM;
+		goto out_err;
 	spin_lock_init(&health->wq_lock);
-	INIT_WORK(&health->work, health_care);
-	INIT_DELAYED_WORK(&health->recover_work, health_recover);
+	INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
+	INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
 
 	return 0;
+
+out_err:
+	mlx5_fw_reporters_destroy(dev);
+	return -ENOMEM;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 90cb50fe17fd..ebd81f6b556e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -122,14 +122,6 @@ static int mlx5i_get_ts_info(struct net_device *netdev,
 	return mlx5e_ethtool_get_ts_info(priv, info);
 }
 
-static int mlx5i_flash_device(struct net_device *netdev,
-			      struct ethtool_flash *flash)
-{
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-
-	return mlx5e_ethtool_flash_device(priv, flash);
-}
-
 enum mlx5_ptys_width {
 	MLX5_PTYS_WIDTH_1X	= 1 << 0,
 	MLX5_PTYS_WIDTH_2X	= 1 << 1,
@@ -241,7 +233,6 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
 	.get_ethtool_stats  = mlx5i_get_ethtool_stats,
 	.get_ringparam      = mlx5i_get_ringparam,
 	.set_ringparam      = mlx5i_set_ringparam,
-	.flash_device       = mlx5i_flash_device,
 	.get_channels       = mlx5i_get_channels,
 	.set_channels       = mlx5i_set_channels,
 	.get_coalesce       = mlx5i_get_coalesce,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 9ca492b430d8..faf197d53743 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
 	mlx5e_set_netdev_mtu_boundaries(priv);
 	netdev->mtu = netdev->max_mtu;
 
-	mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
+	mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
 			       mlx5e_get_netdev_max_channels(netdev),
 			       netdev->mtu);
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
@@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *
 	mlx5_core_destroy_qp(mdev, qp);
 }
 
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
+{
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+	void *tisc;
+
+	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+	MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+
+	return mlx5e_create_tis(mdev, in, tisn);
+}
+
 static int mlx5i_init_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
@@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
-	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+	err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
 		goto err_destroy_underlay_qp;
@@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
@@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	return 0;
 
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv, true);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -401,9 +413,9 @@ err_destroy_q_counters:
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
 	mlx5i_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 	mlx5e_destroy_indirect_tirs(priv, true);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 	mlx5e_destroy_q_counters(priv);
@@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 	.cleanup_rx	   = mlx5i_cleanup_rx,
 	.enable		   = NULL, /* mlx5i_enable */
 	.disable	   = NULL, /* mlx5i_disable */
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = NULL, /* mlx5i_update_stats */
 	.update_carrier    = NULL, /* no HW update in IB link */
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
@@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev)
 	if (err)
 		goto err_remove_fs_underlay_qp;
 
-	mlx5e_refresh_tirs(epriv, false);
+	epriv->profile->update_rx(epriv);
 	mlx5e_activate_priv_channels(epriv);
 
 	mutex_unlock(&epriv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index e19ba3fcd1b7..c87962cab921 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -59,6 +59,8 @@ struct mlx5i_priv {
 	char  *mlx5e_priv[0];
 };
 
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
+
 /* Underlay QP create/destroy functions */
 int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
 void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b491b8f5fd6b..6e56fa769d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
 		goto err_unint_underlay_qp;
 	}
 
-	err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+	err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
 		goto err_remove_rx_uderlay_qp;
@@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
 		mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
 		goto err_clear_state_opened_flag;
 	}
-	mlx5e_refresh_tirs(epriv, false);
+	epriv->profile->update_rx(epriv);
 	mlx5e_activate_priv_channels(epriv);
 	mutex_unlock(&epriv->state_lock);
 
@@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
 	.cleanup_rx	   = mlx5i_pkey_cleanup_rx,
 	.enable		   = NULL,
 	.disable	   = NULL,
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = NULL,
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 959605559858..c5ef2ff26465 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			   !mlx5_sriov_is_enabled(dev1);
 
 #ifdef CONFIG_MLX5_ESWITCH
-		roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE &&
-			    dev1->priv.eswitch->mode == SRIOV_NONE;
+		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
 		if (roce_lag)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 8212bfd05733..e69766393990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include <linux/netdevice.h>
+#include <net/nexthop.h>
 #include "lag.h"
 #include "lag_mp.h"
 #include "mlx5_core.h"
@@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 				     struct fib_info *fi)
 {
 	struct lag_mp *mp = &ldev->lag_mp;
+	struct fib_nh *fib_nh0, *fib_nh1;
+	unsigned int nhs;
 
 	/* Handle delete event */
 	if (event == FIB_EVENT_ENTRY_DEL) {
@@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 	}
 
 	/* Handle add/replace event */
-	if (fi->fib_nhs == 1) {
+	nhs = fib_info_num_path(fi);
+	if (nhs == 1) {
 		if (__mlx5_lag_is_active(ldev)) {
-			struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev;
+			struct fib_nh *nh = fib_info_nh(fi, 0);
+			struct net_device *nh_dev = nh->fib_nh_dev;
 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
 
 			mlx5_lag_set_port_affinity(ldev, ++i);
@@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 		return;
 	}
 
-	if (fi->fib_nhs != 2)
+	if (nhs != 2)
 		return;
 
 	/* Verify next hops are ports of the same hca */
-	if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev &&
-	      fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) &&
-	    !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev &&
-	      fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) {
+	fib_nh0 = fib_info_nh(fi, 0);
+	fib_nh1 = fib_info_nh(fi, 1);
+	if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
+	    !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
 		mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
 		return;
 	}
@@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
 			mlx5_lag_set_port_affinity(ldev, i);
 		}
 	} else if (event == FIB_EVENT_NH_ADD &&
-		   fi->fib_nhs == 2) {
+		   fib_info_num_path(fi) == 2) {
 		mlx5_lag_set_port_affinity(ldev, 0);
 	}
 }
@@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
 	struct mlx5_fib_event_work *fib_work;
 	struct fib_entry_notifier_info *fen_info;
 	struct fib_nh_notifier_info *fnh_info;
+	struct net_device *fib_dev;
 	struct fib_info *fi;
 
 	if (info->family != AF_INET)
@@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
 		fen_info = container_of(info, struct fib_entry_notifier_info,
 					info);
 		fi = fen_info->fi;
-		if (fi->fib_dev != ldev->pf[0].netdev &&
-		    fi->fib_dev != ldev->pf[1].netdev) {
+		if (fi->nh) {
+			NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+			return notifier_from_errno(-EINVAL);
+		}
+		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+		if (fib_dev != ldev->pf[0].netdev &&
+		    fib_dev != ldev->pf[1].netdev) {
 			return NOTIFY_DONE;
 		}
 		fib_work = mlx5_lag_init_fib_work(ldev, event);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 000000000000..ea9ee88491e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       void *key, u32 sz_bytes,
+			       u32 *p_key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+	u8  general_obj_key_size;
+	u64 general_obj_types;
+	void *obj, *key_p;
+	int err;
+
+	obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+	key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key);
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types &
+	      MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+		return -EINVAL;
+
+	switch (sz_bits) {
+	case 128:
+		general_obj_key_size =
+			MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+		break;
+	case 256:
+		general_obj_key_size =
+			MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(key_p, key, sz_bytes);
+
+	MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
+	MLX5_SET(encryption_key_obj, obj, key_type,
+		 MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	/* avoid leaking key on the stack */
+	memzero_explicit(in, sizeof(in));
+
+	return err;
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index c0fb6d72b695..3dfab91ae5f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -7,7 +7,6 @@
 #include <linux/mlx5/eq.h>
 #include <linux/mlx5/cq.h>
 
-#define MLX5_MAX_IRQ_NAME   (32)
 #define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
 
 struct mlx5_eq_tasklet {
@@ -36,8 +35,14 @@ struct mlx5_eq {
 	struct mlx5_rsc_debug   *dbg;
 };
 
+struct mlx5_eq_async {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+};
+
 struct mlx5_eq_comp {
-	struct mlx5_eq          core; /* Must be first */
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
 	struct mlx5_eq_tasklet  tasklet_ctx;
 	struct list_head        list;
 };
@@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev);
 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
 
 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
 void mlx5_cq_tasklet_cb(unsigned long data);
@@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
 #endif
 
-int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
new file mode 100644
index 000000000000..23361a9ae4fa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/kernel.h>
+#include "mlx5_core.h"
+#include "geneve.h"
+
+struct mlx5_geneve {
+	struct mlx5_core_dev *mdev;
+	__be16 opt_class;
+	u8 opt_type;
+	u32 obj_id;
+	struct mutex sync_lock; /* protect GENEVE obj operations */
+	u32 refcount;
+};
+
+static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev,
+					 __be16 class,
+					 u8 type,
+					 u8 len)
+{
+	u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u64 general_obj_types;
+	void *hdr, *opt;
+	u16 obj_id;
+	int err;
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT))
+		return -EINVAL;
+
+	hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr);
+	opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+
+	MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class));
+	MLX5_SET(geneve_tlv_option, opt, option_type, type);
+	MLX5_SET(geneve_tlv_option, opt, option_data_length, len);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return obj_id;
+}
+
+static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt)
+{
+	int res = 0;
+
+	if (IS_ERR_OR_NULL(geneve))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&geneve->sync_lock);
+
+	if (geneve->refcount) {
+		if (geneve->opt_class == opt->opt_class &&
+		    geneve->opt_type == opt->type) {
+			/* We already have TLV options obj allocated */
+			geneve->refcount++;
+		} else {
+			/* TLV options obj allocated, but its params
+			 * do not match the new request.
+			 * We support only one such object.
+			 */
+			mlx5_core_warn(geneve->mdev,
+				       "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type,
+				       opt->length);
+			res = -EOPNOTSUPP;
+			goto unlock;
+		}
+	} else {
+		/* We don't have any TLV options obj allocated */
+
+		res = mlx5_geneve_tlv_option_create(geneve->mdev,
+						    opt->opt_class,
+						    opt->type,
+						    opt->length);
+		if (res < 0) {
+			mlx5_core_warn(geneve->mdev,
+				       "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type, opt->length, res);
+			goto unlock;
+		}
+		geneve->opt_class = opt->opt_class;
+		geneve->opt_type = opt->type;
+		geneve->obj_id = res;
+		geneve->refcount++;
+	}
+
+unlock:
+	mutex_unlock(&geneve->sync_lock);
+	return res;
+}
+
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	mutex_lock(&geneve->sync_lock);
+	if (--geneve->refcount == 0) {
+		/* We've just removed the last user of Geneve option.
+		 * Now delete the object in FW.
+		 */
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+		geneve->opt_class = 0;
+		geneve->opt_type = 0;
+		geneve->obj_id = 0;
+	}
+	mutex_unlock(&geneve->sync_lock);
+}
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_geneve *geneve =
+		kzalloc(sizeof(*geneve), GFP_KERNEL);
+
+	if (!geneve)
+		return ERR_PTR(-ENOMEM);
+	geneve->mdev = mdev;
+	mutex_init(&geneve->sync_lock);
+
+	return geneve;
+}
+
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	/* Lockless since we are unloading */
+	if (geneve->refcount)
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+	kfree(geneve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
new file mode 100644
index 000000000000..adee0cbba19c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_GENEVE_H__
+#define __MLX5_GENEVE_H__
+
+#include <net/geneve.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_geneve;
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev);
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve);
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt);
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline struct mlx5_geneve
+*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; }
+static inline void
+mlx5_geneve_destroy(struct mlx5_geneve *geneve) {}
+static inline int
+mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; }
+static inline void
+mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_GENEVE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 397a2847867a..b99d469e4e64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -41,6 +41,9 @@ int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
 
 /* TODO move to lib/events.h */
 
@@ -76,4 +79,9 @@ struct mlx5_pme_stats {
 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
 
+/* Crypto */
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       void *key, u32 sz_bytes, u32 *p_key_id);
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index a71d5b9c7ab2..3118e8d66407 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
 struct l2table_node {
 	struct l2addr_node node;
 	u32                index; /* index in HW l2 table */
+	int                ref_count;
 };
 
 struct mlx5_mpfs {
@@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
 {
 	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
 	struct l2table_node *l2addr;
+	int err = 0;
 	u32 index;
-	int err;
 
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		return 0;
@@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
 
 	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
 	if (l2addr) {
-		err = -EEXIST;
-		goto abort;
+		l2addr->ref_count++;
+		goto out;
 	}
 
 	err = alloc_l2table_index(mpfs, &index);
 	if (err)
-		goto abort;
+		goto out;
 
 	l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
 	if (!l2addr) {
-		free_l2table_index(mpfs, index);
 		err = -ENOMEM;
-		goto abort;
+		goto hash_add_err;
 	}
 
-	l2addr->index = index;
 	err = set_l2table_entry_cmd(dev, index, mac);
-	if (err) {
-		l2addr_hash_del(l2addr);
-		free_l2table_index(mpfs, index);
-	}
+	if (err)
+		goto set_table_entry_err;
+
+	l2addr->index = index;
+	l2addr->ref_count = 1;
 
 	mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
-abort:
+	goto out;
+
+set_table_entry_err:
+	l2addr_hash_del(l2addr);
+hash_add_err:
+	free_l2table_index(mpfs, index);
+out:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
@@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
 		goto unlock;
 	}
 
+	if (--l2addr->ref_count > 0)
+		goto unlock;
+
 	index = l2addr->index;
 	del_l2table_entry_cmd(dev, index);
 	l2addr_hash_del(l2addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
new file mode 100644
index 000000000000..6b774e0c2766
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/pci.h>
+#include "mlx5_core.h"
+#include "pci_vsc.h"
+
+#define MLX5_EXTRACT_C(source, offset, size)	\
+	((((u32)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len)		\
+	(((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size)			\
+	((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size)		\
+	(MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len)  \
+	((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+	((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len)	\
+	(((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+#define vsc_read(dev, offset, val) \
+	pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define vsc_write(dev, offset, val) \
+	pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define VSC_MAX_RETRIES 2048
+
+enum {
+	VSC_CTRL_OFFSET = 0x4,
+	VSC_COUNTER_OFFSET = 0x8,
+	VSC_SEMAPHORE_OFFSET = 0xc,
+	VSC_ADDR_OFFSET = 0x10,
+	VSC_DATA_OFFSET = 0x14,
+
+	VSC_FLAG_BIT_OFFS = 31,
+	VSC_FLAG_BIT_LEN = 1,
+
+	VSC_SYND_BIT_OFFS = 30,
+	VSC_SYND_BIT_LEN = 1,
+
+	VSC_ADDR_BIT_OFFS = 0,
+	VSC_ADDR_BIT_LEN = 30,
+
+	VSC_SPACE_BIT_OFFS = 0,
+	VSC_SPACE_BIT_LEN = 16,
+
+	VSC_SIZE_VLD_BIT_OFFS = 28,
+	VSC_SIZE_VLD_BIT_LEN = 1,
+
+	VSC_STATUS_BIT_OFFS = 29,
+	VSC_STATUS_BIT_LEN = 3,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_pf(dev))
+		return;
+
+	dev->vsc_addr = pci_find_capability(dev->pdev,
+					    PCI_CAP_ID_VNDR);
+	if (!dev->vsc_addr)
+		mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n");
+}
+
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+{
+	u32 counter = 0;
+	int retries = 0;
+	u32 lock_val;
+	int ret;
+
+	pci_cfg_access_lock(dev->pdev);
+	do {
+		if (retries > VSC_MAX_RETRIES) {
+			ret = -EBUSY;
+			goto pci_unlock;
+		}
+
+		/* Check if semaphore is already locked */
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		if (lock_val) {
+			retries++;
+			usleep_range(1000, 2000);
+			continue;
+		}
+
+		/* Read and write counter value, if written value is
+		 * the same, semaphore was acquired successfully.
+		 */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		retries++;
+	} while (counter != lock_val);
+
+	return 0;
+
+pci_unlock:
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev)
+{
+	int ret;
+
+	ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK);
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size)
+{
+	int ret;
+	u32 val = 0;
+
+	if (!mlx5_vsc_accessible(dev))
+		return -EINVAL;
+
+	if (ret_space_size)
+		*ret_space_size = 0;
+
+	/* Get a unique val */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	/* Try to modify the lock */
+	val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN);
+	ret = vsc_write(dev, VSC_CTRL_OFFSET, val);
+	if (ret)
+		goto out;
+
+	/* Verify lock was modified */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0)
+		return -EINVAL;
+
+	/* Get space max address if indicated by size valid bit */
+	if (ret_space_size &&
+	    MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) {
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &val);
+		if (ret) {
+			mlx5_core_warn(dev, "Failed to get max space size\n");
+			goto out;
+		}
+		*ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS,
+					       VSC_ADDR_BIT_LEN);
+	}
+	return 0;
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val)
+{
+	int retries = 0;
+	u32 flag;
+	int ret;
+
+	do {
+		if (retries > VSC_MAX_RETRIES)
+			return -EBUSY;
+
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag);
+		if (ret)
+			return ret;
+		flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN);
+		retries++;
+
+		if ((retries & 0xf) == 0)
+			usleep_range(1000, 2000);
+
+	} while (flag != expected_val);
+
+	return 0;
+}
+
+static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address,
+			     u32 data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	/* Set flag to 0x1 */
+	address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1);
+	ret = vsc_write(dev, VSC_DATA_OFFSET, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	/* Wait for the flag to be cleared */
+	ret = mlx5_vsc_wait_on_flag(dev, 0);
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address,
+			    u32 *data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	ret = mlx5_vsc_wait_on_flag(dev, 1);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_DATA_OFFSET, data);
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev,
+				 unsigned int read_addr,
+				 unsigned int *next_read_addr,
+				 u32 *data)
+{
+	int ret;
+
+	ret = mlx5_vsc_gw_read(dev, read_addr, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr);
+	if (ret)
+		goto out;
+
+	*next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS,
+				       VSC_ADDR_BIT_LEN);
+
+	if (*next_read_addr <= read_addr)
+		ret = -EINVAL;
+out:
+	return ret;
+}
+
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length)
+{
+	unsigned int next_read_addr = 0;
+	unsigned int read_addr = 0;
+
+	while (read_addr < length) {
+		if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr,
+					  &data[(read_addr >> 2)]))
+			return read_addr;
+
+		read_addr = next_read_addr;
+	}
+	return length;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state)
+{
+	u32 data, id = 0;
+	int ret;
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+	if (ret) {
+		mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+		return ret;
+	}
+
+	if (state == MLX5_VSC_LOCK) {
+		/* Get a unique ID based on the counter */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+		if (ret)
+			return ret;
+	}
+
+	/* Try to modify lock */
+	ret = mlx5_vsc_gw_write(dev, space, id);
+	if (ret)
+		return ret;
+
+	/* Verify lock was modified */
+	ret = mlx5_vsc_gw_read(dev, space, &data);
+	if (ret)
+		return -EINVAL;
+
+	if (data != id)
+		return -EBUSY;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
new file mode 100644
index 000000000000..64272a6d7754
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_PCI_VSC_H__
+#define __MLX5_PCI_VSC_H__
+
+enum mlx5_vsc_state {
+	MLX5_VSC_UNLOCK,
+	MLX5_VSC_LOCK,
+};
+
+enum {
+	MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size);
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length);
+
+static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev)
+{
+	return !!dev->vsc_addr;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state);
+
+#endif /* __MLX5_PCI_VSC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 23d53163ce15..b15b27a497fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,6 +56,7 @@
 #include "fs_core.h"
 #include "lib/mpfs.h"
 #include "eswitch.h"
+#include "devlink.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
 #include "fpga/ipsec.h"
@@ -63,7 +64,9 @@
 #include "accel/tls.h"
 #include "lib/clock.h"
 #include "lib/vxlan.h"
+#include "lib/geneve.h"
 #include "lib/devcom.h"
+#include "lib/pci_vsc.h"
 #include "diag/fw_tracer.h"
 #include "ecpf.h"
 
@@ -169,18 +172,28 @@ static struct mlx5_profile profile[] = {
 
 #define FW_INIT_TIMEOUT_MILI		2000
 #define FW_INIT_WAIT_MS			2
-#define FW_PRE_INIT_TIMEOUT_MILI	10000
+#define FW_PRE_INIT_TIMEOUT_MILI	120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL	20000
 
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+			u32 warn_time_mili)
 {
+	unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 	int err = 0;
 
+	BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
 	while (fw_initializing(dev)) {
 		if (time_after(jiffies, end)) {
 			err = -EBUSY;
 			break;
 		}
+		if (warn_time_mili && time_after(jiffies, warn)) {
+			mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+				       jiffies_to_msecs(end - warn) / 1000);
+			warn = jiffies + msecs_to_jiffies(warn_time_mili);
+		}
 		msleep(FW_INIT_WAIT_MS);
 	}
 
@@ -721,8 +734,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 	struct mlx5_priv *priv = &dev->priv;
 	int err = 0;
 
-	priv->pci_dev_data = id->driver_data;
-
+	mutex_init(&dev->pci_status_mutex);
 	pci_set_drvdata(dev->pdev, dev);
 
 	dev->bar_addr = pci_resource_start(pdev, 0);
@@ -761,6 +773,8 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 		goto err_clr_master;
 	}
 
+	mlx5_pci_vsc_init(dev);
+
 	return 0;
 
 err_clr_master:
@@ -794,10 +808,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_devcom;
 	}
 
+	err = mlx5_irq_table_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "failed to initialize irq table\n");
+		goto err_devcom;
+	}
+
 	err = mlx5_eq_table_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "failed to initialize eq\n");
-		goto err_devcom;
+		goto err_irq_cleanup;
 	}
 
 	err = mlx5_events_init(dev);
@@ -821,6 +841,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	mlx5_init_clock(dev);
 
 	dev->vxlan = mlx5_vxlan_create(dev);
+	dev->geneve = mlx5_geneve_create(dev);
 
 	err = mlx5_init_rl_table(dev);
 	if (err) {
@@ -834,37 +855,38 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_rl_cleanup;
 	}
 
-	err = mlx5_eswitch_init(dev);
+	err = mlx5_sriov_init(dev);
 	if (err) {
-		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
 		goto err_mpfs_cleanup;
 	}
 
-	err = mlx5_sriov_init(dev);
+	err = mlx5_eswitch_init(dev);
 	if (err) {
-		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
-		goto err_eswitch_cleanup;
+		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+		goto err_sriov_cleanup;
 	}
 
 	err = mlx5_fpga_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
-		goto err_sriov_cleanup;
+		goto err_eswitch_cleanup;
 	}
 
 	dev->tracer = mlx5_fw_tracer_create(dev);
 
 	return 0;
 
-err_sriov_cleanup:
-	mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+	mlx5_sriov_cleanup(dev);
 err_mpfs_cleanup:
 	mlx5_mpfs_cleanup(dev);
 err_rl_cleanup:
 	mlx5_cleanup_rl_table(dev);
 err_tables_cleanup:
+	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -873,6 +895,8 @@ err_events_cleanup:
 	mlx5_events_cleanup(dev);
 err_eq_cleanup:
 	mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+	mlx5_irq_table_cleanup(dev);
 err_devcom:
 	mlx5_devcom_unregister_device(dev->priv.devcom);
 
@@ -883,10 +907,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
 	mlx5_fw_tracer_destroy(dev->tracer);
 	mlx5_fpga_cleanup(dev);
-	mlx5_sriov_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+	mlx5_sriov_cleanup(dev);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
+	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
@@ -895,6 +920,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_events_cleanup(dev);
 	mlx5_eq_table_cleanup(dev);
+	mlx5_irq_table_cleanup(dev);
 	mlx5_devcom_unregister_device(dev->priv.devcom);
 }
 
@@ -911,7 +937,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 
 	/* wait for firmware to accept initialization segments configurations
 	 */
-	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
 	if (err) {
 		mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
 			      FW_PRE_INIT_TIMEOUT_MILI);
@@ -924,7 +950,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 		return err;
 	}
 
-	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
 	if (err) {
 		mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
 			      FW_INIT_TIMEOUT_MILI);
@@ -1028,6 +1054,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 	mlx5_events_start(dev);
 	mlx5_pagealloc_start(dev);
 
+	err = mlx5_irq_table_create(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc IRQs\n");
+		goto err_irq_table;
+	}
+
 	err = mlx5_eq_table_create(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to create EQs\n");
@@ -1099,6 +1131,8 @@ err_fpga_start:
 err_fw_tracer:
 	mlx5_eq_table_destroy(dev);
 err_eq_table:
+	mlx5_irq_table_destroy(dev);
+err_irq_table:
 	mlx5_pagealloc_stop(dev);
 	mlx5_events_stop(dev);
 	mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1115,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_fpga_device_stop(dev);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 	mlx5_eq_table_destroy(dev);
+	mlx5_irq_table_destroy(dev);
 	mlx5_pagealloc_stop(dev);
 	mlx5_events_stop(dev);
 	mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1183,7 +1218,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 	int err = 0;
 
 	if (cleanup)
-		mlx5_drain_health_recovery(dev);
+		mlx5_drain_health_wq(dev);
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1210,17 +1245,6 @@ out:
 	return err;
 }
 
-static const struct devlink_ops mlx5_devlink_ops = {
-#ifdef CONFIG_MLX5_ESWITCH
-	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
-	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
-	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
-	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
-	.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
-	.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
-#endif
-};
-
 static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -1230,7 +1254,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
-	mutex_init(&dev->pci_status_mutex);
 	mutex_init(&dev->intf_state_mutex);
 
 	mutex_init(&priv->bfregs.reg_head.lock);
@@ -1282,9 +1305,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct devlink *devlink;
 	int err;
 
-	devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+	devlink = mlx5_devlink_alloc();
 	if (!devlink) {
-		dev_err(&pdev->dev, "kzalloc failed\n");
+		dev_err(&pdev->dev, "devlink alloc failed\n");
 		return -ENOMEM;
 	}
 
@@ -1292,6 +1315,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->device = &pdev->dev;
 	dev->pdev = pdev;
 
+	dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+			 MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
 	err = mlx5_mdev_init(dev, prof_sel);
 	if (err)
 		goto mdev_init_err;
@@ -1312,10 +1338,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	request_module_nowait(MLX5_IB_MOD);
 
-	err = devlink_register(devlink, &pdev->dev);
+	err = mlx5_devlink_register(devlink, &pdev->dev);
 	if (err)
 		goto clean_load;
 
+	err = mlx5_crdump_enable(dev);
+	if (err)
+		dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
 	pci_save_state(pdev);
 	return 0;
 
@@ -1327,7 +1357,7 @@ err_load_one:
 pci_init_err:
 	mlx5_mdev_uninit(dev);
 mdev_init_err:
-	devlink_free(devlink);
+	mlx5_devlink_free(devlink);
 
 	return err;
 }
@@ -1337,7 +1367,8 @@ static void remove_one(struct pci_dev *pdev)
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(dev);
 
-	devlink_unregister(devlink);
+	mlx5_crdump_disable(dev);
+	mlx5_devlink_unregister(devlink);
 	mlx5_unregister_device(dev);
 
 	if (mlx5_unload_one(dev, true)) {
@@ -1348,7 +1379,7 @@ static void remove_one(struct pci_dev *pdev)
 
 	mlx5_pci_close(dev);
 	mlx5_mdev_uninit(dev);
-	devlink_free(devlink);
+	mlx5_devlink_free(devlink);
 }
 
 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
@@ -1359,12 +1390,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 	mlx5_core_info(dev, "%s was called\n", __func__);
 
 	mlx5_enter_error_state(dev, false);
+	mlx5_error_sw_reset(dev);
 	mlx5_unload_one(dev, false);
-	/* In case of kernel call drain the health wq */
-	if (state) {
-		mlx5_drain_health_wq(dev);
-		mlx5_pci_disable_device(dev);
-	}
+	mlx5_drain_health_wq(dev);
+	mlx5_pci_disable_device(dev);
 
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -1532,7 +1561,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
-	mlx5_pci_err_detected(dev->pdev, 0);
+	mlx5_error_sw_reset(dev);
+	mlx5_unload_one(dev, false);
 }
 
 void mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1570,7 +1600,7 @@ static int __init init(void)
 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
 	mlx5_core_verify_params();
-	mlx5_fpga_ipsec_build_fs_cmds();
+	mlx5_accel_ipsec_build_fs_cmds();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 22e69d4813e4..471bbc48bc1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -111,6 +111,11 @@ enum {
 	MLX5_DRIVER_SYND = 0xbadd00de,
 };
 
+enum mlx5_semaphore_space_address {
+	MLX5_SEMAPHORE_SPACE_DOMAIN     = 0xA,
+	MLX5_SEMAPHORE_SW_RESET         = 0x20,
+};
+
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
@@ -118,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
@@ -153,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
 
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
 void mlx5_events_start(struct mlx5_core_dev *dev);
@@ -184,7 +203,10 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
 			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
 
-int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
+			struct netlink_ext_ack *extack);
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+			  u32 *running_ver, u32 *stored_ver);
 
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
@@ -213,7 +235,7 @@ enum {
 	MLX5_NIC_IFC_FULL		= 0,
 	MLX5_NIC_IFC_DISABLED		= 1,
 	MLX5_NIC_IFC_NO_DRAM_NIC	= 2,
-	MLX5_NIC_IFC_INVALID		= 3
+	MLX5_NIC_IFC_SW_RESET		= 7
 };
 
 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index ea744d8466ea..9231b39d18b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -38,15 +38,12 @@
 
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
-	memset(table, 0, sizeof(*table));
-	rwlock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
 }
 
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 {
+	WARN_ON(!xa_empty(&dev->priv.mkey_table));
 }
 
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 			     mlx5_async_cbk_t callback,
 			     struct mlx5_async_work *context)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	u32 mkey_index;
 	void *mkc;
 	int err;
@@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      mkey_index, key, mkey->key);
 
-	/* connect to mkey tree */
-	write_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
-	write_unlock_irq(&table->lock);
+	err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+				  GFP_KERNEL));
 	if (err) {
-		mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+		mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
 			       mlx5_base_mkey(mkey->key), err);
 		mlx5_core_destroy_mkey(dev, mkey);
 	}
@@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	struct mlx5_core_mkey *deleted_mkey;
 	unsigned long flags;
 
-	write_lock_irqsave(&table->lock, flags);
-	deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
-	write_unlock_irqrestore(&table->lock, flags);
+	xa_lock_irqsave(mkeys, flags);
+	deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+	xa_unlock_irqrestore(mkeys, flags);
 	if (!deleted_mkey) {
-		mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+		mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
 			      mlx5_base_mkey(mkey->key));
 		return -ENOENT;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 000000000000..373981a659c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+	struct atomic_notifier_head nh;
+	cpumask_var_t mask;
+	char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+	struct mlx5_irq *irq;
+	int nvec;
+#ifdef CONFIG_RFS_ACCEL
+	struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *irq_table;
+
+	irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+	if (!irq_table)
+		return -ENOMEM;
+
+	dev->priv.irq_table = irq_table;
+	return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+	kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+	return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+	atomic_notifier_call_chain(nh, 0, NULL);
+	return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+	if (vecidx == 0) {
+		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+		return;
+	}
+
+	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
+	return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+	char name[MLX5_MAX_IRQ_NAME];
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		irq_set_name(name, i);
+		ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+		snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+			 "%s@pci:%s", name, pci_name(dev->pdev));
+		err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+				  &irq->nh);
+		if (err) {
+			mlx5_core_err(dev, "Failed to request irq\n");
+			goto err_request_irq;
+		}
+	}
+	return 0;
+
+err_request_irq:
+	for (; i >= 0; i--) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		free_irq(irqn, &irq->nh);
+	}
+	return  err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+	int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+	int num_affinity_vec;
+	int vecidx;
+
+	num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+	irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+	if (!irq_table->rmap) {
+		err = -ENOMEM;
+		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+		goto err_out;
+	}
+
+	vecidx = MLX5_IRQ_VEC_COMP_BASE;
+	for (; vecidx < irq_table->nvec; vecidx++) {
+		err = irq_cpu_rmap_add(irq_table->rmap,
+				       pci_irq_vector(mdev->pdev, vecidx));
+		if (err) {
+			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+				      err);
+			goto err_irq_cpu_rmap_add;
+		}
+	}
+	return 0;
+
+err_irq_cpu_rmap_add:
+	irq_clear_rmap(mdev);
+err_out:
+#endif
+	return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+			irq->mask);
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irqn, irq->mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+			       irqn);
+
+	return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	irq_set_affinity_hint(irqn, NULL);
+	free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		err = set_comp_irq_affinity_hint(mdev, i);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	for (i--; i >= 0; i--)
+		clear_comp_irq_affinity_hint(mdev, i);
+
+	return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int i;
+
+	for (i = 0; i < nvec; i++)
+		clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+	return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+	return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_irq_table *table = priv->irq_table;
+	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+		      MLX5_CAP_GEN(dev, max_num_eqs) :
+		      1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int nvec;
+	int err;
+
+	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+	       MLX5_IRQ_VEC_COMP_BASE;
+	nvec = min_t(int, nvec, num_eqs);
+	if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+		return -ENOMEM;
+
+	table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+	if (!table->irq)
+		return -ENOMEM;
+
+	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+				     nvec, PCI_IRQ_MSIX);
+	if (nvec < 0) {
+		err = nvec;
+		goto err_free_irq;
+	}
+
+	table->nvec = nvec;
+
+	err = irq_set_rmap(dev);
+	if (err)
+		goto err_set_rmap;
+
+	err = request_irqs(dev, nvec);
+	if (err)
+		goto err_request_irqs;
+
+	err = set_comp_irq_affinity_hints(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+		goto err_set_affinity;
+	}
+
+	return 0;
+
+err_set_affinity:
+	unrequest_irqs(dev);
+err_request_irqs:
+	irq_clear_rmap(dev);
+err_set_rmap:
+	pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+	kfree(table->irq);
+	return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	/* free_irq requires that affinity and rmap will be cleared
+	 * before calling it. This is why there is asymmetry with set_rmap
+	 * which should be called after alloc_irq but before request_irq.
+	 */
+	irq_clear_rmap(dev);
+	clear_comp_irqs_affinity_hints(dev);
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+	pci_free_irq_vectors(dev->pdev);
+	kfree(table->irq);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 86f77456f873..17ce9dd56b13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
 
 	return 0;
 
-destroy_flow_table:
-	mlx5_destroy_flow_table(ft);
 destroy_flow_group:
 	mlx5_destroy_flow_group(fg);
+destroy_flow_table:
+	mlx5_destroy_flow_table(ft);
 free:
 	kvfree(spec);
 	kvfree(flow_group_in);
@@ -126,7 +126,7 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *
 {
 	u8 hw_id[ETH_ALEN];
 
-	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+	mlx5_query_mac_address(dev, hw_id);
 	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 	addrconf_addr_eui48(&gid->raw[8], hw_id);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a249b3c3843d..61fcfd8b39b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -74,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 	int err;
 	int vf;
 
-	if (sriov->enabled_vfs) {
-		mlx5_core_warn(dev,
-			       "failed to enable SRIOV on device, already enabled with %d vfs\n",
-			       sriov->enabled_vfs);
-		return -EBUSY;
-	}
-
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		goto enable_vfs_hca;
 
-	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+	mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs);
+	err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "failed to enable eswitch SRIOV (%d)\n", err);
@@ -99,7 +93,6 @@ enable_vfs_hca:
 			continue;
 		}
 		sriov->vfs_ctx[vf].enabled = 1;
-		sriov->enabled_vfs++;
 		if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
 			err = sriov_restore_guids(dev, vf);
 			if (err) {
@@ -118,13 +111,11 @@ enable_vfs_hca:
 static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int num_vfs = pci_num_vf(dev->pdev);
 	int err;
 	int vf;
 
-	if (!sriov->enabled_vfs)
-		goto out;
-
-	for (vf = 0; vf < sriov->num_vfs; vf++) {
+	for (vf = num_vfs - 1; vf >= 0; vf--) {
 		if (!sriov->vfs_ctx[vf].enabled)
 			continue;
 		err = mlx5_core_disable_hca(dev, vf + 1);
@@ -133,12 +124,10 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 			continue;
 		}
 		sriov->vfs_ctx[vf].enabled = 0;
-		sriov->enabled_vfs--;
 	}
 
-out:
 	if (MLX5_ESWITCH_MANAGER(dev))
-		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+		mlx5_eswitch_disable(dev->priv.eswitch);
 
 	if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -191,13 +180,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
 int mlx5_sriov_attach(struct mlx5_core_dev *dev)
 {
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-
-	if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+	if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
 		return 0;
 
 	/* If sriov VFs exist in PCI level, enable them in device level */
-	return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+	return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev));
 }
 
 void mlx5_sriov_detach(struct mlx5_core_dev *dev)
@@ -208,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
 	mlx5_device_disable_sriov(dev);
 }
 
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+	u16 host_total_vfs;
+	const u32 *out;
+
+	if (mlx5_core_is_ecpf_esw_manager(dev)) {
+		out = mlx5_esw_query_functions(dev);
+
+		/* Old FW doesn't support getting total_vfs from esw func
+		 * but supports getting it from pci_sriov.
+		 */
+		if (IS_ERR(out))
+			goto done;
+		host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+					  host_params_context.host_total_vfs);
+		kvfree(out);
+		if (host_total_vfs)
+			return host_total_vfs;
+	}
+
+done:
+	return pci_sriov_get_totalvfs(dev->pdev);
+}
+
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -218,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
 		return 0;
 
 	total_vfs = pci_sriov_get_totalvfs(pdev);
+	sriov->max_vfs = mlx5_get_max_vfs(dev);
 	sriov->num_vfs = pci_num_vf(pdev);
 	sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
 	if (!sriov->vfs_ctx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 95cdc8cbcba4..c912d82ca64b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -34,6 +34,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 
 /* Mutex to hold while enabling or disabling RoCE */
@@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 }
 
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
-				     u16 vport, u8 *addr)
+				     u16 vport, bool other, u8 *addr)
 {
-	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
 	u8 *out_addr;
+	u32 *out;
 	int err;
 
 	out = kvzalloc(outlen, GFP_KERNEL);
@@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 	out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
 				nic_vport_context.permanent_address);
 
-	err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 	if (!err)
 		ether_addr_copy(addr, &out_addr[2]);
 
@@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
 
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+{
+	return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
+
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				      u16 vport, u8 *addr)
 {
@@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 field_select.permanent_address, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-
-	if (vport)
-		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 
 	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
 				     in, nic_vport_context);
@@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
 		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
 	MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
 	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-
-	if (vport)
-		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
 	if (err)
@@ -483,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 field_select.node_guid, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-	MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 
 	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
 					 in, nic_vport_context);
@@ -1157,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
 	return tmp;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev:	Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of vports for
+ * the eswitch.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 1f87cce421e0..f1ec58c9e9e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
 	*wq->db = cpu_to_be32(wq->wqe_ctr);
 }
 
-static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-	return ctr >> wq->fbc.log_sz;
-}
-
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
 	return ctr & wq->fbc.sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
index 14c0c62f8e73..c50e74ab02c4 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -5,6 +5,7 @@
 #define _MLXFW_H
 
 #include <linux/firmware.h>
+#include <linux/netlink.h>
 
 enum mlxfw_fsm_state {
 	MLXFW_FSM_STATE_IDLE,
@@ -57,6 +58,10 @@ struct mlxfw_dev_ops {
 	void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
 
 	void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+
+	void (*status_notify)(struct mlxfw_dev *mlxfw_dev,
+			      const char *msg, const char *comp_name,
+			      u32 done_bytes, u32 total_bytes);
 };
 
 struct mlxfw_dev {
@@ -67,11 +72,13 @@ struct mlxfw_dev {
 
 #if IS_REACHABLE(CONFIG_MLXFW)
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware);
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack);
 #else
 static inline
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware)
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
index 240c027e5f07..67990406cba2 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
@@ -39,8 +39,19 @@ static const char * const mlxfw_fsm_state_err_str[] = {
 		"unknown error"
 };
 
+static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev,
+				const char *msg, const char *comp_name,
+				u32 done_bytes, u32 total_bytes)
+{
+	if (!mlxfw_dev->ops->status_notify)
+		return;
+	mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name,
+				      done_bytes, total_bytes);
+}
+
 static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
-				enum mlxfw_fsm_state fsm_state)
+				enum mlxfw_fsm_state fsm_state,
+				struct netlink_ext_ack *extack)
 {
 	enum mlxfw_fsm_state_err fsm_state_err;
 	enum mlxfw_fsm_state curr_fsm_state;
@@ -57,11 +68,13 @@ retry:
 	if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) {
 		pr_err("Firmware flash failed: %s\n",
 		       mlxfw_fsm_state_err_str[fsm_state_err]);
+		NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed");
 		return -EINVAL;
 	}
 	if (curr_fsm_state != fsm_state) {
 		if (--times == 0) {
 			pr_err("Timeout reached on FSM state change");
+			NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change");
 			return -ETIMEDOUT;
 		}
 		msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS);
@@ -76,16 +89,20 @@ retry:
 
 static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 				 u32 fwhandle,
-				 struct mlxfw_mfa2_component *comp)
+				 struct mlxfw_mfa2_component *comp,
+				 struct netlink_ext_ack *extack)
 {
 	u16 comp_max_write_size;
 	u8 comp_align_bits;
 	u32 comp_max_size;
+	char comp_name[8];
 	u16 block_size;
 	u8 *block_ptr;
 	u32 offset;
 	int err;
 
+	sprintf(comp_name, "%u", comp->index);
+
 	err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index,
 					      &comp_max_size, &comp_align_bits,
 					      &comp_max_write_size);
@@ -96,6 +113,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 	if (comp->data_size > comp_max_size) {
 		pr_err("Component %d is of size %d which is bigger than limit %d\n",
 		       comp->index, comp->data_size, comp_max_size);
+		NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit");
 		return -EINVAL;
 	}
 
@@ -103,6 +121,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 					       comp_align_bits);
 
 	pr_debug("Component update\n");
+	mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0);
 	err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle,
 						   comp->index,
 						   comp->data_size);
@@ -110,11 +129,13 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 		return err;
 
 	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
-				   MLXFW_FSM_STATE_DOWNLOAD);
+				   MLXFW_FSM_STATE_DOWNLOAD, extack);
 	if (err)
 		goto err_out;
 
 	pr_debug("Component download\n");
+	mlxfw_status_notify(mlxfw_dev, "Downloading component",
+			    comp_name, 0, comp->data_size);
 	for (offset = 0;
 	     offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits);
 	     offset += comp_max_write_size) {
@@ -126,15 +147,20 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 							 offset);
 		if (err)
 			goto err_out;
+		mlxfw_status_notify(mlxfw_dev, "Downloading component",
+				    comp_name, offset + block_size,
+				    comp->data_size);
 	}
 
 	pr_debug("Component verify\n");
+	mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0);
 	err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle,
 						   comp->index);
 	if (err)
 		goto err_out;
 
-	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_out;
 	return 0;
@@ -145,7 +171,8 @@ err_out:
 }
 
 static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
-				  struct mlxfw_mfa2_file *mfa2_file)
+				  struct mlxfw_mfa2_file *mfa2_file,
+				  struct netlink_ext_ack *extack)
 {
 	u32 component_count;
 	int err;
@@ -156,6 +183,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
 					      &component_count);
 	if (err) {
 		pr_err("Could not find device PSID in MFA2 file\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file");
 		return err;
 	}
 
@@ -168,7 +196,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
 			return PTR_ERR(comp);
 
 		pr_info("Flashing component type %d\n", comp->index);
-		err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp);
+		err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack);
 		mlxfw_mfa2_file_component_put(comp);
 		if (err)
 			return err;
@@ -177,7 +205,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
 }
 
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware)
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxfw_mfa2_file *mfa2_file;
 	u32 fwhandle;
@@ -185,6 +214,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
 
 	if (!mlxfw_mfa2_check(firmware)) {
 		pr_err("Firmware file is not MFA2\n");
+		NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2");
 		return -EINVAL;
 	}
 
@@ -193,29 +223,35 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
 		return PTR_ERR(mfa2_file);
 
 	pr_info("Initialize firmware flash process\n");
+	mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process",
+			    NULL, 0, 0);
 	err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle);
 	if (err) {
 		pr_err("Could not lock the firmware FSM\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM");
 		goto err_fsm_lock;
 	}
 
 	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
-				   MLXFW_FSM_STATE_LOCKED);
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_state_wait_idle_to_locked;
 
-	err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file);
+	err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack);
 	if (err)
 		goto err_flash_components;
 
 	pr_debug("Activate image\n");
+	mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0);
 	err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle);
 	if (err) {
 		pr_err("Could not activate the downloaded image\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image");
 		goto err_fsm_activate;
 	}
 
-	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_state_wait_activate_to_locked;
 
@@ -223,6 +259,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
 	mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle);
 
 	pr_info("Firmware flash done.\n");
+	mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0);
 	mlxfw_mfa2_file_fini(mfa2_file);
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 11ded0bc7d98..06c80343d9ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -83,6 +83,8 @@ config MLXSW_SPECTRUM
 	select PARMAN
 	select OBJAGG
 	select MLXFW
+	imply PTP_1588_CLOCK
+	select NET_PTP_CLASSIFY if PTP_1588_CLOCK
 	default m
 	---help---
 	  This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index c4dc72e1ce63..171b36bd8a4e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -31,5 +31,6 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_nve.o spectrum_nve_vxlan.o \
 				   spectrum_dpipe.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
+mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK)		+= spectrum_ptp.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
 mlxsw_minimal-objs		:= minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 0772e4339b33..5ffdfb532cb7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -317,6 +317,18 @@ MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64);
  */
 MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2);
 
+/* cmd_mbox_query_fw_free_running_clock_offset
+ * The offset of the free running clock page
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64);
+
+/* cmd_mbox_query_fw_fr_rn_clk_bar
+ * PCI base address register (BAR) of the free running clock page
+ * 0: BAR 0
+ * 1: 64 bit BAR
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2);
+
 /* QUERY_BOARDINFO - Query Board Information
  * -----------------------------------------
  * OpMod == 0 (N/A), INMmod == 0 (N/A)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 6ee6de7f0160..17ceac7505e5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1003,6 +1003,20 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
 	return err;
 }
 
+static int mlxsw_devlink_flash_update(struct devlink *devlink,
+				      const char *file_name,
+				      const char *component,
+				      struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->flash_update)
+		return -EOPNOTSUPP;
+	return mlxsw_driver->flash_update(mlxsw_core, file_name,
+					  component, extack);
+}
+
 static const struct devlink_ops mlxsw_devlink_ops = {
 	.reload				= mlxsw_devlink_core_bus_device_reload,
 	.port_type_set			= mlxsw_devlink_port_type_set,
@@ -1019,6 +1033,7 @@ static const struct devlink_ops mlxsw_devlink_ops = {
 	.sb_occ_port_pool_get		= mlxsw_devlink_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_devlink_sb_occ_tc_port_bind_get,
 	.info_get			= mlxsw_devlink_info_get,
+	.flash_update			= mlxsw_devlink_flash_update,
 };
 
 static int
@@ -1098,6 +1113,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 			goto err_register_params;
 	}
 
+	if (mlxsw_driver->init) {
+		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+		if (err)
+			goto err_driver_init;
+	}
+
 	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
 	if (err)
 		goto err_hwmon_init;
@@ -1107,22 +1128,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_thermal_init;
 
-	if (mlxsw_driver->init) {
-		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
-		if (err)
-			goto err_driver_init;
-	}
-
 	if (mlxsw_driver->params_register && !reload)
 		devlink_params_publish(devlink);
 
 	return 0;
 
-err_driver_init:
-	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
+err_driver_init:
 	if (mlxsw_driver->params_unregister && !reload)
 		mlxsw_driver->params_unregister(mlxsw_core);
 err_register_params:
@@ -1187,10 +1203,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 
 	if (mlxsw_core->driver->params_unregister && !reload)
 		devlink_params_unpublish(devlink);
-	if (mlxsw_core->driver->fini)
-		mlxsw_core->driver->fini(mlxsw_core);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
 	if (mlxsw_core->driver->params_unregister && !reload)
 		mlxsw_core->driver->params_unregister(mlxsw_core);
 	if (!reload)
@@ -1229,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(mlxsw_core_skb_transmit);
 
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port)
+{
+	if (mlxsw_core->driver->ptp_transmitted)
+		mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb,
+						    local_port);
+}
+EXPORT_SYMBOL(mlxsw_core_ptp_transmitted);
+
 static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
 				   const struct mlxsw_rx_listener *rxl_b)
 {
@@ -2010,6 +2035,18 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox,
 }
 EXPORT_SYMBOL(mlxsw_core_resources_query);
 
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_h);
+
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_l);
+
 static int __init mlxsw_core_module_init(void)
 {
 	int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index e3832cb5bdda..8efcff4b59cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
 				  const struct mlxsw_tx_info *tx_info);
 int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 			    const struct mlxsw_tx_info *tx_info);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port);
 
 struct mlxsw_rx_listener {
 	void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
@@ -284,6 +286,9 @@ struct mlxsw_driver {
 				       unsigned int sb_index, u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
+	int (*flash_update)(struct mlxsw_core *mlxsw_core,
+			    const char *file_name, const char *component,
+			    struct netlink_ext_ack *extack);
 	void (*txhdr_construct)(struct sk_buff *skb,
 				const struct mlxsw_tx_info *tx_info);
 	int (*resources_register)(struct mlxsw_core *mlxsw_core);
@@ -293,6 +298,13 @@ struct mlxsw_driver {
 			     u64 *p_linear_size);
 	int (*params_register)(struct mlxsw_core *mlxsw_core);
 	void (*params_unregister)(struct mlxsw_core *mlxsw_core);
+
+	/* Notify a driver that a timestamped packet was transmitted. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port);
+
 	u8 txhdr_len;
 	const struct mlxsw_config_profile *profile;
 	bool res_query_enabled;
@@ -306,6 +318,9 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core);
 void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core);
 
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core);
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core);
+
 bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
 			  enum mlxsw_res_id res_id);
 
@@ -336,6 +351,8 @@ struct mlxsw_bus {
 			char *in_mbox, size_t in_mbox_size,
 			char *out_mbox, size_t out_mbox_size,
 			u8 *p_status);
+	u32 (*read_frc_h)(void *bus_priv);
+	u32 (*read_frc_l)(void *bus_priv);
 	u8 features;
 };
 
@@ -353,7 +370,8 @@ struct mlxsw_bus_info {
 	struct mlxsw_fw_rev fw_rev;
 	u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
 	u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
-	u8 low_frequency;
+	u8 low_frequency:1,
+	   read_frc_capable:1;
 };
 
 struct mlxsw_hwmon;
@@ -409,4 +427,14 @@ enum mlxsw_devlink_param_id {
 	MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
 };
 
+struct mlxsw_skb_cb {
+	struct mlxsw_tx_info tx_info;
+};
+
+static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb));
+	return (struct mlxsw_skb_cb *) skb->cb;
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index cb3e663b1d37..feb4672a5ac0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -30,8 +30,9 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
 
 			elinst = &block->instances[j];
 			if (elinst->type != elinst->info->type ||
-			    elinst->item.size.bits !=
-			    elinst->info->item.size.bits)
+			    (!elinst->avoid_size_check &&
+			     elinst->item.size.bits !=
+			     elinst->info->item.size.bits))
 				return false;
 		}
 	}
@@ -385,12 +386,12 @@ EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
 
 static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item,
 				    const struct mlxsw_item *output_item,
-				    char *storage, char *output)
+				    char *storage, char *output, int diff)
 {
 	u32 value;
 
 	value = __mlxsw_item_get32(storage, storage_item, 0);
-	__mlxsw_item_set32(output, output_item, 0, value);
+	__mlxsw_item_set32(output, output_item, 0, value + diff);
 }
 
 static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
@@ -406,14 +407,14 @@ static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
 
 static void
 mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
-			char *output, char *storage)
+			char *output, char *storage, int u32_diff)
 {
 	const struct mlxsw_item *storage_item = &elinst->info->item;
 	const struct mlxsw_item *output_item = &elinst->item;
 
 	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
 		mlxsw_sp_afk_encode_u32(storage_item, output_item,
-					storage, output);
+					storage, output, u32_diff);
 	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
 		mlxsw_sp_afk_encode_buf(storage_item, output_item,
 					storage, output);
@@ -446,9 +447,10 @@ void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
 				continue;
 
 			mlxsw_sp_afk_encode_one(elinst, block_key,
-						values->storage.key);
+						values->storage.key,
+						elinst->u32_key_diff);
 			mlxsw_sp_afk_encode_one(elinst, block_mask,
-						values->storage.mask);
+						values->storage.mask, 0);
 		}
 
 		mlxsw_afk->ops->encode_block(key, i, block_key);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 4a625cdf3e7c..cb229b55ecc4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -74,7 +74,7 @@ struct mlxsw_afk_element_info {
  * define an internal storage geometry.
  */
 static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
-	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16),
 	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
 	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
@@ -107,9 +107,14 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */
 	const struct mlxsw_afk_element_info *info;
 	enum mlxsw_afk_element_type type;
 	struct mlxsw_item item; /* element geometry in block */
+	int u32_key_diff; /* in case value needs to be adjusted before write
+			   * this diff is here to handle that
+			   */
+	bool avoid_size_check;
 };
 
-#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size)		\
+#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset,			\
+			       _shift, _size, _u32_key_diff, _avoid_size_check)	\
 	{									\
 		.info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element],	\
 		.type = _type,							\
@@ -119,15 +124,24 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */
 			.size = {.bits = _size},				\
 			.name = #_element,					\
 		},								\
+		.u32_key_diff = _u32_key_diff,					\
+		.avoid_size_check = _avoid_size_check,				\
 	}
 
 #define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size)		\
 	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32,			\
-			       _element, _offset, _shift, _size)
+			       _element, _offset, _shift, _size, 0, false)
+
+#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset,			\
+				       _shift, _size, _key_diff,		\
+				       _avoid_size_check)			\
+	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32,			\
+			       _element, _offset, _shift, _size,		\
+			       _key_diff, _avoid_size_check)
 
 #define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size)			\
 	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF,			\
-			       _element, _offset, 0, _size)
+			       _element, _offset, 0, _size, 0, false)
 
 struct mlxsw_afk_block {
 	u16 encoding; /* block ID */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 72539a9a3847..d2c7ce67c300 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 		u16 temp;
 	} temp_thresh;
 	char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
-	char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
-	u16 module_temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int module_temp;
 	bool qsfp;
 	int err;
 
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
-			    1);
-	err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl);
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err)
 		return err;
-
-	/* Don't read temperature thresholds for module with no valid info. */
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL);
-	switch (module_temp) {
-	case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA:
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+	if (!module_temp) {
 		*temp = 0;
 		return 0;
-	default:
-		/* Do not consider thresholds for zero temperature. */
-		if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) {
-			*temp = 0;
-			return 0;
-		}
-		break;
 	}
 
 	/* Read Free Side Device Temperature Thresholds from page 03h
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 496dc904c5ed..5b00726c4346 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr {
 	char name[32];
 };
 
+static int mlxsw_hwmon_get_attr_index(int index, int count)
+{
+	if (index >= count)
+		return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+
+	return index;
+}
+
 struct mlxsw_hwmon {
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -33,6 +41,7 @@ struct mlxsw_hwmon {
 	struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
 	unsigned int attrs_count;
 	u8 sensor_count;
+	u8 module_sensor_count;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -43,18 +52,19 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp;
+	int temp, index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temp);
 }
 
 static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
@@ -65,18 +75,19 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp_max;
+	int temp_max, index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
-	return sprintf(buf, "%u\n", temp_max);
+	return sprintf(buf, "%d\n", temp_max);
 }
 
 static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
@@ -88,6 +99,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned long val;
+	int index;
 	int err;
 
 	err = kstrtoul(buf, 10, &val);
@@ -96,7 +108,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 	if (val != 1)
 		return -EINVAL;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
 	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -198,40 +212,20 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
-	char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
-	u16 temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	u8 module;
+	int temp;
 	int err;
 
 	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
-			    1);
-	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
-	if (err) {
-		dev_err(dev, "Failed to query module temperature sensor\n");
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
 		return err;
-	}
-
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
-	/* Update status and temperature cache. */
-	switch (temp) {
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA:
-		temp = 0;
-		break;
-	case MLXSW_REG_MTBR_BAD_SENS_INFO:
-		/* Untrusted cable is connected. Reading temperature from its
-		 * sensor is faulty.
-		 */
-		temp = 0;
-		break;
-	default:
-		temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
-		break;
-	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temp);
 }
 
 static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
@@ -333,6 +327,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev,
 		       mlwsw_hwmon_attr->type_index);
 }
 
+static ssize_t
+mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	int index = mlwsw_hwmon_attr->type_index -
+		    mlxsw_hwmon->module_sensor_count + 1;
+
+	return sprintf(buf, "gearbox %03u\n", index);
+}
+
 enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -345,6 +353,7 @@ enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
 };
 
 static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -428,6 +437,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_label", num + 1);
 		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_gbox_temp_label_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_label", num + 1);
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -556,6 +572,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
 				     index, index);
 		index++;
 	}
+	mlxsw_hwmon->module_sensor_count = index;
+
+	return 0;
+}
+
+static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	int index, max_index, sensor_index;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u8 gbox_num;
+	int err;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+	if (!gbox_num)
+		return 0;
+
+	index = mlxsw_hwmon->module_sensor_count;
+	max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+	while (index < max_index) {
+		sensor_index = index % mlxsw_hwmon->module_sensor_count +
+			       MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+		mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+		err = mlxsw_reg_write(mlxsw_hwmon->core,
+				      MLXSW_REG(mtmp), mtmp_pl);
+		if (err) {
+			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+				sensor_index);
+			return err;
+		}
+		mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+				     index, index);
+		index++;
+	}
 
 	return 0;
 }
@@ -586,6 +650,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_temp_module_init;
 
+	err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon);
+	if (err)
+		goto err_temp_gearbox_init;
+
 	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
 	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
@@ -602,6 +670,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_hwmon_register:
+err_temp_gearbox_init:
 err_temp_module_init:
 err_fans_init:
 err_temp_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index d3e851e7ca72..35a1dc89c28a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -23,6 +23,7 @@
 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
+#define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
 #define MLXSW_THERMAL_MAX_STATE	10
 #define MLXSW_THERMAL_MAX_DUTY	255
 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
@@ -98,7 +99,7 @@ struct mlxsw_thermal_module {
 	struct thermal_zone_device *tzdev;
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
 	enum thermal_device_mode mode;
-	int module;
+	int module; /* Module or gearbox number */
 };
 
 struct mlxsw_thermal {
@@ -111,6 +112,10 @@ struct mlxsw_thermal {
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
 	enum thermal_device_mode mode;
 	struct mlxsw_thermal_module *tz_module_arr;
+	struct mlxsw_thermal_module *tz_gearbox_arr;
+	u8 tz_gearbox_num;
+	unsigned int tz_highest_score;
+	struct thermal_zone_device *tz_highest_dev;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -195,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
 	return 0;
 }
 
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
+					  struct thermal_zone_device *tzdev,
+					  struct mlxsw_thermal_trip *trips,
+					  int temp)
+{
+	struct mlxsw_thermal_trip *trip = trips;
+	unsigned int score, delta, i, shift = 1;
+
+	/* Calculate thermal zone score, if temperature is above the critical
+	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
+	 */
+	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
+	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
+	     i++, trip++) {
+		if (temp < trip->temp) {
+			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
+			score = delta * shift;
+			break;
+		}
+		shift *= 256;
+	}
+
+	if (score > thermal->tz_highest_score) {
+		thermal->tz_highest_score = score;
+		thermal->tz_highest_dev = tzdev;
+	}
+}
+
 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
 			      struct thermal_cooling_device *cdev)
 {
@@ -279,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp;
+	int temp;
 	int err;
 
 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
@@ -290,8 +323,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
+					      temp);
 
-	*p_temp = (int) temp;
+	*p_temp = temp;
 	return 0;
 }
 
@@ -351,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
 	return 0;
 }
 
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
+				   int trip, enum thermal_trend *trend)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	if (tzdev == thermal->tz_highest_dev)
+		return 1;
+
+	*trend = THERMAL_TREND_STABLE;
+	return 0;
+}
+
 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
 	.bind = mlxsw_thermal_bind,
 	.unbind = mlxsw_thermal_unbind,
@@ -362,6 +414,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = {
 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
+	.get_trend	= mlxsw_thermal_trend_get,
 };
 
 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
@@ -449,39 +502,33 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 	struct mlxsw_thermal_module *tz = tzdev->devdata;
 	struct mlxsw_thermal *thermal = tz->parent;
 	struct device *dev = thermal->bus_info->dev;
-	char mtbr_pl[MLXSW_REG_MTBR_LEN];
-	u16 temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	int temp;
 	int err;
 
 	/* Read module temperature. */
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX +
-			    tz->module, 1);
-	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
-	/* Update temperature. */
-	switch (temp) {
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */
-	case MLXSW_REG_MTBR_BAD_SENS_INFO:
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+			    tz->module, false, false);
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		/* Do not return error - in case of broken module's sensor
+		 * it will cause error message flooding.
+		 */
 		temp = 0;
-		break;
-	default:
-		temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
-		/* Reset all trip point. */
-		mlxsw_thermal_module_trips_reset(tz);
-		/* Update trip points. */
-		err = mlxsw_thermal_module_trips_update(dev, thermal->core,
-							tz);
-		if (err)
-			return err;
-		break;
+		*p_temp = (int) temp;
+		return 0;
 	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	*p_temp = temp;
+
+	if (!temp)
+		return 0;
+
+	/* Update trip points. */
+	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+	if (!err && temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
 
-	*p_temp = (int) temp;
 	return 0;
 }
 
@@ -545,10 +592,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
 	return 0;
 }
 
-static struct thermal_zone_params mlxsw_thermal_module_params = {
-	.governor_name = "user_space",
-};
-
 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
 	.bind		= mlxsw_thermal_module_bind,
 	.unbind		= mlxsw_thermal_module_unbind,
@@ -560,6 +603,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_trend_get,
+};
+
+static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
+					  int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u16 index;
+	int temp;
+	int err;
+
+	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+	*p_temp = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
+	.bind		= mlxsw_thermal_module_bind,
+	.unbind		= mlxsw_thermal_module_unbind,
+	.get_mode	= mlxsw_thermal_module_mode_get,
+	.set_mode	= mlxsw_thermal_module_mode_set,
+	.get_temp	= mlxsw_thermal_gearbox_temp_get,
+	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
+	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
+	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
+	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
+	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_trend_get,
 };
 
 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
@@ -675,13 +758,13 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
 							MLXSW_THERMAL_TRIP_MASK,
 							module_tz,
 							&mlxsw_thermal_module_ops,
-							&mlxsw_thermal_module_params,
-							0, 0);
+							NULL, 0, 0);
 	if (IS_ERR(module_tz->tzdev)) {
 		err = PTR_ERR(module_tz->tzdev);
 		return err;
 	}
 
+	module_tz->mode = THERMAL_DEVICE_ENABLED;
 	return 0;
 }
 
@@ -787,6 +870,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
 	kfree(thermal->tz_module_arr);
 }
 
+static int
+mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+{
+	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+
+	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
+		 gearbox_tz->module + 1);
+	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
+						MLXSW_THERMAL_NUM_TRIPS,
+						MLXSW_THERMAL_TRIP_MASK,
+						gearbox_tz,
+						&mlxsw_thermal_gearbox_ops,
+						NULL, 0, 0);
+	if (IS_ERR(gearbox_tz->tzdev))
+		return PTR_ERR(gearbox_tz->tzdev);
+
+	gearbox_tz->mode = THERMAL_DEVICE_ENABLED;
+	return 0;
+}
+
+static void
+mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
+{
+	thermal_zone_device_unregister(gearbox_tz->tzdev);
+}
+
+static int
+mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
+			     struct mlxsw_thermal *thermal)
+{
+	struct mlxsw_thermal_module *gearbox_tz;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	int i;
+	int err;
+
+	if (!mlxsw_core_res_query_enabled(core))
+		return 0;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+	if (!thermal->tz_gearbox_num)
+		return 0;
+
+	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
+					  sizeof(*thermal->tz_gearbox_arr),
+					  GFP_KERNEL);
+	if (!thermal->tz_gearbox_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < thermal->tz_gearbox_num; i++) {
+		gearbox_tz = &thermal->tz_gearbox_arr[i];
+		memcpy(gearbox_tz->trips, default_thermal_trips,
+		       sizeof(thermal->trips));
+		gearbox_tz->module = i;
+		gearbox_tz->parent = thermal;
+		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
+		if (err)
+			goto err_unreg_tz_gearbox;
+	}
+
+	return 0;
+
+err_unreg_tz_gearbox:
+	for (i--; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+	return err;
+}
+
+static void
+mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
+{
+	int i;
+
+	if (!mlxsw_core_res_query_enabled(thermal->core))
+		return;
+
+	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+}
+
 int mlxsw_thermal_init(struct mlxsw_core *core,
 		       const struct mlxsw_bus_info *bus_info,
 		       struct mlxsw_thermal **p_thermal)
@@ -877,10 +1046,16 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
 	if (err)
 		goto err_unreg_tzdev;
 
+	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
+	if (err)
+		goto err_unreg_modules_tzdev;
+
 	thermal->mode = THERMAL_DEVICE_ENABLED;
 	*p_thermal = thermal;
 	return 0;
 
+err_unreg_modules_tzdev:
+	mlxsw_thermal_modules_fini(thermal);
 err_unreg_tzdev:
 	if (thermal->tzdev) {
 		thermal_zone_device_unregister(thermal->tzdev);
@@ -899,6 +1074,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
 {
 	int i;
 
+	mlxsw_thermal_gearboxes_fini(thermal);
 	mlxsw_thermal_modules_fini(thermal);
 	if (thermal->tzdev) {
 		thermal_zone_device_unregister(thermal->tzdev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 06aea1999518..95f408d0e103 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -43,11 +43,10 @@
 #define MLXSW_I2C_PREP_SIZE		(MLXSW_I2C_ADDR_WIDTH + 28)
 #define MLXSW_I2C_MBOX_SIZE		20
 #define MLXSW_I2C_MBOX_OUT_PARAM_OFF	12
-#define MLXSW_I2C_MAX_BUFF_SIZE		32
 #define MLXSW_I2C_MBOX_OFFSET_BITS	20
 #define MLXSW_I2C_MBOX_SIZE_BITS	12
 #define MLXSW_I2C_ADDR_BUF_SIZE		4
-#define MLXSW_I2C_BLK_MAX		32
+#define MLXSW_I2C_BLK_DEF		32
 #define MLXSW_I2C_RETRY			5
 #define MLXSW_I2C_TIMEOUT_MSECS		5000
 #define MLXSW_I2C_MAX_DATA_SIZE		256
@@ -62,6 +61,7 @@
  * @dev: I2C device;
  * @core: switch core pointer;
  * @bus_info: bus info block;
+ * @block_size: maximum block size allowed to pass to under layer;
  */
 struct mlxsw_i2c {
 	struct {
@@ -74,6 +74,7 @@ struct mlxsw_i2c {
 	struct device *dev;
 	struct mlxsw_core *core;
 	struct mlxsw_bus_info bus_info;
+	u16 block_size;
 };
 
 #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
@@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
 	unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
-	u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
 	int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
 	unsigned long end;
+	u8 *tran_buf;
 	struct i2c_msg write_tran =
-		MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+		MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE);
 	int err;
 
+	tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE,
+			   GFP_KERNEL);
+	if (!tran_buf)
+		return -ENOMEM;
+
+	write_tran.buf = tran_buf;
 	for (i = 0; i < num; i++) {
-		chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : in_mbox_size;
+		chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : in_mbox_size;
 		write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 		memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
-		       MLXSW_I2C_BLK_MAX * i, chunk_size);
+		       mlxsw_i2c->block_size * i, chunk_size);
 
 		j = 0;
 		end = jiffies + timeout;
@@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 			 (j++ < MLXSW_I2C_RETRY));
 
 		if (err != 1) {
-			if (!err)
+			if (!err) {
 				err = -EIO;
-			return err;
+				goto mlxsw_i2c_write_exit;
+			}
 		}
 
 		off += chunk_size;
@@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 	err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
 	if (err) {
 		dev_err(&client->dev, "Could not start transaction");
-		return -EIO;
+		err = -EIO;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Wait until go bit is cleared. */
 	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
 	if (err) {
 		dev_err(&client->dev, "HW semaphore is not released");
-		return err;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Validate transaction completion status. */
 	if (*p_status) {
 		dev_err(&client->dev, "Bad transaction completion status %x\n",
 			*p_status);
-		return -EIO;
+		err = -EIO;
 	}
 
-	return 0;
+mlxsw_i2c_write_exit:
+	kfree(tran_buf);
+	return err;
 }
 
 /* Routine executes I2C command. */
@@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 
 	if (in_mbox) {
 		reg_size = mlxsw_i2c_get_reg_size(in_mbox);
-		num = reg_size / MLXSW_I2C_BLK_MAX;
-		if (reg_size % MLXSW_I2C_BLK_MAX)
+		num = reg_size / mlxsw_i2c->block_size;
+		if (reg_size % mlxsw_i2c->block_size)
 			num++;
 
 		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
@@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 	} else {
 		/* No input mailbox is case of initialization query command. */
 		reg_size = MLXSW_I2C_MAX_DATA_SIZE;
-		num = reg_size / MLXSW_I2C_BLK_MAX;
+		num = reg_size / mlxsw_i2c->block_size;
 
 		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
 			dev_err(&client->dev, "Could not acquire lock");
@@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 	/* Send read transaction to get output mailbox content. */
 	read_tran[1].buf = out_mbox;
 	for (i = 0; i < num; i++) {
-		chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : reg_size;
+		chunk_size = (reg_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : reg_size;
 		read_tran[1].len = chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 
@@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (!mbox)
 		return -ENOMEM;
 
+	err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+	if (err)
+		goto mbox_put;
+
+	mlxsw_i2c->bus_info.fw_rev.major =
+		mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.minor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.subminor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
 	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
 
+mbox_put:
 	mlxsw_cmd_mbox_free(mbox);
 	return err;
 }
@@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = {
 static int mlxsw_i2c_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id)
 {
+	const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
 	struct mlxsw_i2c *mlxsw_i2c;
 	u8 status;
 	int err;
@@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 	if (!mlxsw_i2c)
 		return -ENOMEM;
 
+	if (quirks) {
+		if ((quirks->max_read_len &&
+		     quirks->max_read_len < MLXSW_I2C_BLK_DEF) ||
+		    (quirks->max_write_len &&
+		     quirks->max_write_len < MLXSW_I2C_BLK_DEF)) {
+			dev_err(&client->dev, "Insufficient transaction buffer length\n");
+			return -EOPNOTSUPP;
+		}
+
+		mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+					      min_t(u16, quirks->max_read_len,
+						    quirks->max_write_len));
+	} else {
+		mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF;
+	}
+
 	i2c_set_clientdata(client, mlxsw_i2c);
 	mutex_init(&mlxsw_i2c->cmd.lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index cf2114273b72..471b0ca6d69a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = {
 	.ndo_get_devlink_port	= mlxsw_m_port_get_devlink_port,
 };
 
+static void mlxsw_m_module_get_drvinfo(struct net_device *dev,
+				       struct ethtool_drvinfo *drvinfo)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+	struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+	strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind,
+		sizeof(drvinfo->driver));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%d",
+		 mlxsw_m->bus_info->fw_rev.major,
+		 mlxsw_m->bus_info->fw_rev.minor,
+		 mlxsw_m->bus_info->fw_rev.subminor);
+	strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name,
+		sizeof(drvinfo->bus_info));
+}
+
 static int mlxsw_m_get_module_info(struct net_device *netdev,
 				   struct ethtool_modinfo *modinfo)
 {
@@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
 }
 
 static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
+	.get_drvinfo		= mlxsw_m_module_get_drvinfo,
 	.get_module_info	= mlxsw_m_get_module_info,
 	.get_module_eeprom	= mlxsw_m_get_module_eeprom,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index b40455f8293d..051b19388a81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -102,6 +102,7 @@ struct mlxsw_pci_queue_type_group {
 struct mlxsw_pci {
 	struct pci_dev *pdev;
 	u8 __iomem *hw_addr;
+	u64 free_running_clock_offset;
 	struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
 	u32 doorbell_offset;
 	struct mlxsw_core *core;
@@ -507,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
 	struct mlxsw_pci_queue_elem_info *elem_info;
+	struct mlxsw_tx_info tx_info;
 	char *wqe;
 	struct sk_buff *skb;
 	int i;
 
 	spin_lock(&q->lock);
 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+	tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
 	skb = elem_info->u.sdq.skb;
 	wqe = elem_info->elem;
 	for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
 		mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
-	dev_kfree_skb_any(skb);
+
+	if (unlikely(!tx_info.is_emad &&
+		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb,
+					   tx_info.local_port);
+		skb = NULL;
+	}
+
+	if (skb)
+		dev_kfree_skb_any(skb);
 	elem_info->u.sdq.skb = NULL;
 
 	if (q->consumer_counter++ != consumer_counter_limit)
@@ -1414,6 +1426,15 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	mlxsw_pci->doorbell_offset =
 		mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
 
+	if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) {
+		dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n");
+		err = -EINVAL;
+		goto err_fr_rn_clk_bar;
+	}
+
+	mlxsw_pci->free_running_clock_offset =
+		mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox);
+
 	num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
 	err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
 	if (err)
@@ -1469,6 +1490,7 @@ err_query_resources:
 err_boardinfo:
 	mlxsw_pci_fw_area_fini(mlxsw_pci);
 err_fw_area_init:
+err_fr_rn_clk_bar:
 err_doorbell_page_bar:
 err_iface_rev:
 err_query_fw:
@@ -1537,6 +1559,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
 		err = -EAGAIN;
 		goto unlock;
 	}
+	mlxsw_skb_cb(skb)->tx_info = *tx_info;
 	elem_info->u.sdq.skb = skb;
 
 	wqe = elem_info->elem;
@@ -1560,6 +1583,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
 			goto unmap_frags;
 	}
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
 	/* Set unused sq entries byte count to zero. */
 	for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
 		mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
@@ -1672,6 +1698,24 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 	return err;
 }
 
+static u32 mlxsw_pci_read_frc_h(void *bus_priv)
+{
+	struct mlxsw_pci *mlxsw_pci = bus_priv;
+	u64 frc_offset;
+
+	frc_offset = mlxsw_pci->free_running_clock_offset;
+	return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_H(frc_offset));
+}
+
+static u32 mlxsw_pci_read_frc_l(void *bus_priv)
+{
+	struct mlxsw_pci *mlxsw_pci = bus_priv;
+	u64 frc_offset;
+
+	frc_offset = mlxsw_pci->free_running_clock_offset;
+	return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_L(frc_offset));
+}
+
 static const struct mlxsw_bus mlxsw_pci_bus = {
 	.kind			= "pci",
 	.init			= mlxsw_pci_init,
@@ -1679,6 +1723,8 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
 	.skb_transmit_busy	= mlxsw_pci_skb_transmit_busy,
 	.skb_transmit		= mlxsw_pci_skb_transmit,
 	.cmd_exec		= mlxsw_pci_cmd_exec,
+	.read_frc_h		= mlxsw_pci_read_frc_h,
+	.read_frc_l		= mlxsw_pci_read_frc_l,
 	.features		= MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
 };
 
@@ -1740,6 +1786,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	mlxsw_pci->bus_info.device_kind = driver_name;
 	mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
 	mlxsw_pci->bus_info.dev = &pdev->dev;
+	mlxsw_pci->bus_info.read_frc_capable = true;
 	mlxsw_pci->id = id;
 
 	err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 8648ca171254..e57e42e2d2b2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -43,6 +43,9 @@
 #define MLXSW_PCI_DOORBELL(offset, type_offset, num)	\
 	((offset) + (type_offset) + (num) * 4)
 
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_H(offset)	(offset)
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_L(offset)	((offset) + 4)
+
 #define MLXSW_PCI_CQS_MAX	96
 #define MLXSW_PCI_EQS_COUNT	2
 #define MLXSW_PCI_EQ_ASYNC_NUM	0
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 7ed63ed657c7..ead36702549a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3515,6 +3515,18 @@ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
  */
 MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
 
+/* reg_qeec_ptps
+ * PTP shaper
+ * 0: regular shaper mode
+ * 1: PTP oriented shaper
+ * Allowed only for hierarchy 0
+ * Not supported for CPU port
+ * Note that ptps mode may affect the shaper rates of all hierarchies
+ * Supported only on Spectrum-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1);
+
 enum {
 	MLXSW_REG_QEEC_BYTES_MODE,
 	MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3601,6 +3613,16 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
 	mlxsw_reg_qeec_next_element_index_set(payload, next_index);
 }
 
+static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
+					    bool ptps)
+{
+	MLXSW_REG_ZERO(qeec, payload);
+	mlxsw_reg_qeec_local_port_set(payload, local_port);
+	mlxsw_reg_qeec_element_hierarchy_set(payload,
+					     MLXSW_REG_QEEC_HIERARCY_PORT);
+	mlxsw_reg_qeec_ptps_set(payload, ptps);
+}
+
 /* QRWE - QoS ReWrite Enable
  * -------------------------
  * This register configures the rewrite enable per receive port.
@@ -3814,6 +3836,112 @@ mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc)
 	mlxsw_reg_qtctm_mc_set(payload, mc);
 }
 
+/* QPSC - QoS PTP Shaper Configuration Register
+ * --------------------------------------------
+ * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1.
+ * Supported only on Spectrum-1.
+ */
+#define MLXSW_REG_QPSC_ID 0x401B
+#define MLXSW_REG_QPSC_LEN 0x28
+
+MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN);
+
+enum mlxsw_reg_qpsc_port_speed {
+	MLXSW_REG_QPSC_PORT_SPEED_100M,
+	MLXSW_REG_QPSC_PORT_SPEED_1G,
+	MLXSW_REG_QPSC_PORT_SPEED_10G,
+	MLXSW_REG_QPSC_PORT_SPEED_25G,
+};
+
+/* reg_qpsc_port_speed
+ * Port speed.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4);
+
+/* reg_qpsc_shaper_time_exp
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4);
+
+/* reg_qpsc_shaper_time_mantissa
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5);
+
+/* reg_qpsc_shaper_inc
+ * Number of tokens added to shaper on each update.
+ * Units of 8B.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5);
+
+/* reg_qpsc_shaper_bs
+ * Max shaper Burst size.
+ * Burst size is 2 ^ max_shaper_bs * 512 [bits]
+ * Range is: 5..25 (from 2KB..2GB)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6);
+
+/* reg_qpsc_ptsc_we
+ * Write enable to port_to_shaper_credits.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1);
+
+/* reg_qpsc_port_to_shaper_credits
+ * For split ports: range 1..57
+ * For non-split ports: range 1..112
+ * Written only when ptsc_we is set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8);
+
+/* reg_qpsc_ing_timestamp_inc
+ * Ingress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at ingress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32);
+
+/* reg_qpsc_egr_timestamp_inc
+ * Egress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at egress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32);
+
+static inline void
+mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed,
+		    u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc,
+		    u8 shaper_bs, u8 port_to_shaper_credits,
+		    int ing_timestamp_inc, int egr_timestamp_inc)
+{
+	MLXSW_REG_ZERO(qpsc, payload);
+	mlxsw_reg_qpsc_port_speed_set(payload, port_speed);
+	mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp);
+	mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa);
+	mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc);
+	mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs);
+	mlxsw_reg_qpsc_ptsc_we_set(payload, true);
+	mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits);
+	mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc);
+	mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc);
+}
+
 /* PMLP - Ports Module to Local Port Register
  * ------------------------------------------
  * Configures the assignment of modules to local ports.
@@ -5292,6 +5420,8 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 };
 
 /* reg_htgt_trap_group
@@ -8039,16 +8169,21 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
 
 MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
 
+#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
+#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
 /* reg_mtmp_sensor_index
  * Sensors index to access.
  * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
  * (module 0 is mapped to sensor_index 64).
  * Access: Index
  */
-MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
 
 /* Convert to milli degrees Celsius */
-#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
+#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \
+					  ((v_) >= 0) ? ((v_) * 125) : \
+					  ((s16)((GENMASK(15, 0) + (v_) + 1) \
+					   * 125)); })
 
 /* reg_mtmp_temperature
  * Temperature reading from the sensor. Reading is in 0.125 Celsius
@@ -8107,7 +8242,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
  */
 MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
 
-static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
 				       bool max_temp_enable,
 				       bool max_temp_reset)
 {
@@ -8119,11 +8254,10 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
 						    MLXSW_REG_MTMP_THRESH_HI);
 }
 
-static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
-					 unsigned int *p_max_temp,
-					 char *sensor_name)
+static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
+					 int *p_max_temp, char *sensor_name)
 {
-	u16 temp;
+	s16 temp;
 
 	if (p_temp) {
 		temp = mlxsw_reg_mtmp_temperature_get(payload);
@@ -8156,7 +8290,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
  * 64-127 are mapped to the SFP+/QSFP modules sequentially).
  * Access: Index
  */
-MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12);
 
 /* reg_mtbr_num_rec
  * Request: Number of records to read
@@ -8183,7 +8317,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
 MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
 		     MLXSW_REG_MTBR_REC_LEN, 0x00, false);
 
-static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
 				       u8 num_rec)
 {
 	MLXSW_REG_ZERO(mtbr, payload);
@@ -8689,6 +8823,107 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
 					   MLXSW_REG_MLCR_DURATION_MAX : 0);
 }
 
+/* MTPPS - Management Pulse Per Second Register
+ * --------------------------------------------
+ * This register provides the device PPS capabilities, configure the PPS in and
+ * out modules and holds the PPS in time stamp.
+ */
+#define MLXSW_REG_MTPPS_ID 0x9053
+#define MLXSW_REG_MTPPS_LEN 0x3C
+
+MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN);
+
+/* reg_mtpps_enable
+ * Enables the PPS functionality the specific pin.
+ * A boolean variable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1);
+
+enum mlxsw_reg_mtpps_pin_mode {
+	MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2,
+};
+
+/* reg_mtpps_pin_mode
+ * Pin mode to be used. The mode must comply with the supported modes of the
+ * requested pin.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4);
+
+#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN	7
+
+/* reg_mtpps_pin
+ * Pin to be configured or queried out of the supported pins.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8);
+
+/* reg_mtpps_time_stamp
+ * When pin_mode = pps_in, the latched device time when it was triggered from
+ * the external GPIO pin.
+ * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target
+ * time to generate next output signal.
+ * Time is in units of device clock.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64);
+
+static inline void
+mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp)
+{
+	MLXSW_REG_ZERO(mtpps, payload);
+	mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN);
+	mlxsw_reg_mtpps_pin_mode_set(payload,
+				     MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN);
+	mlxsw_reg_mtpps_enable_set(payload, true);
+	mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp);
+}
+
+/* MTUTC - Management UTC Register
+ * -------------------------------
+ * Configures the HW UTC counter.
+ */
+#define MLXSW_REG_MTUTC_ID 0x9055
+#define MLXSW_REG_MTUTC_LEN 0x1C
+
+MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN);
+
+enum mlxsw_reg_mtutc_operation {
+	MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0,
+	MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3,
+};
+
+/* reg_mtutc_operation
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4);
+
+/* reg_mtutc_freq_adjustment
+ * Frequency adjustment: Every PPS the HW frequency will be
+ * adjusted by this value. Units of HW clock, where HW counts
+ * 10^9 HW clocks for 1 HW second.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32);
+
+/* reg_mtutc_utc_sec
+ * UTC seconds.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32);
+
+static inline void
+mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper,
+		     u32 freq_adj, u32 utc_sec)
+{
+	MLXSW_REG_ZERO(mtutc, payload);
+	mlxsw_reg_mtutc_operation_set(payload, oper);
+	mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj);
+	mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec);
+}
+
 /* MCQI - Management Component Query Information
  * ---------------------------------------------
  * This register allows querying information about firmware components.
@@ -9043,6 +9278,267 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
 	mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
 }
 
+/* MOGCR - Monitoring Global Configuration Register
+ * ------------------------------------------------
+ */
+#define MLXSW_REG_MOGCR_ID 0x9086
+#define MLXSW_REG_MOGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN);
+
+/* reg_mogcr_ptp_iftc
+ * PTP Ingress FIFO Trap Clear
+ * The PTP_ING_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
+
+/* reg_mogcr_ptp_eftc
+ * PTP Egress FIFO Trap Clear
+ * The PTP_EGR_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);
+
+/* MTPPPC - Time Precision Packet Port Configuration
+ * -------------------------------------------------
+ * This register serves for configuration of which PTP messages should be
+ * timestamped. This is a global configuration, despite the register name.
+ *
+ * Reserved when Spectrum-2.
+ */
+#define MLXSW_REG_MTPPPC_ID 0x9090
+#define MLXSW_REG_MTPPPC_LEN 0x28
+
+MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN);
+
+/* reg_mtpppc_ing_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at ingress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16);
+
+/* reg_mtpppc_egr_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at egress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr)
+{
+	MLXSW_REG_ZERO(mtpppc, payload);
+	mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing);
+	mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
+}
+
+/* MTPPTR - Time Precision Packet Timestamping Reading
+ * ---------------------------------------------------
+ * The MTPPTR is used for reading the per port PTP timestamp FIFO.
+ * There is a trap for packets which are latched to the timestamp FIFO, thus the
+ * SW knows which FIFO to read. Note that packets enter the FIFO before been
+ * trapped. The sequence number is used to synchronize the timestamp FIFO
+ * entries and the trapped packets.
+ * Reserved when Spectrum-2.
+ */
+
+#define MLXSW_REG_MTPPTR_ID 0x9091
+#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4
+#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN +		\
+		    MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN);
+
+/* reg_mtpptr_local_port
+ * Not supported for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mtpptr_dir {
+	MLXSW_REG_MTPPTR_DIR_INGRESS,
+	MLXSW_REG_MTPPTR_DIR_EGRESS,
+};
+
+/* reg_mtpptr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1);
+
+/* reg_mtpptr_clr
+ * Clear the records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1);
+
+/* reg_mtpptr_num_rec
+ * Number of valid records in the response
+ * Range 0.. cap_ptp_timestamp_fifo
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4);
+
+/* reg_mtpptr_rec_message_type
+ * MessageType field as defined by IEEE 1588 Each bit corresponds to a value
+ * (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 8, 4,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_domain_number
+ * DomainNumber field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 8,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_sequence_id
+ * SequenceId field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 16,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0x4, false);
+
+/* reg_mtpptr_rec_timestamp_high
+ * Timestamp of when the PTP packet has passed through the port Units of PLL
+ * clock time.
+ * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0x8, false);
+
+/* reg_mtpptr_rec_timestamp_low
+ * See rec_timestamp_high.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0xC, false);
+
+static inline void mlxsw_reg_mtpptr_unpack(const char *payload,
+					   unsigned int rec,
+					   u8 *p_message_type,
+					   u8 *p_domain_number,
+					   u16 *p_sequence_id,
+					   u64 *p_timestamp)
+{
+	u32 timestamp_high, timestamp_low;
+
+	*p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec);
+	*p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec);
+	*p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec);
+	timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec);
+	timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec);
+	*p_timestamp = (u64)timestamp_high << 32 | timestamp_low;
+}
+
+/* MTPTPT - Monitoring Precision Time Protocol Trap Register
+ * ---------------------------------------------------------
+ * This register is used for configuring under which trap to deliver PTP
+ * packets depending on type of the packet.
+ */
+#define MLXSW_REG_MTPTPT_ID 0x9092
+#define MLXSW_REG_MTPTPT_LEN 0x08
+
+MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN);
+
+enum mlxsw_reg_mtptpt_trap_id {
+	MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+	MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+};
+
+/* reg_mtptpt_trap_id
+ * Trap id.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4);
+
+/* reg_mtptpt_message_type
+ * Bitwise vector of PTP message types to trap. This is a necessary but
+ * non-sufficient condition since need to enable also per port. See MTPPPC.
+ * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g.
+ * Bit0: Sync, Bit1: Delay_Req)
+ */
+MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mtptptp_pack(char *payload,
+					  enum mlxsw_reg_mtptpt_trap_id trap_id,
+					  u16 message_type)
+{
+	MLXSW_REG_ZERO(mtptpt, payload);
+	mlxsw_reg_mtptpt_trap_id_set(payload, trap_id);
+	mlxsw_reg_mtptpt_message_type_set(payload, message_type);
+}
+
+/* MGPIR - Management General Peripheral Information Register
+ * ----------------------------------------------------------
+ * MGPIR register allows software to query the hardware and
+ * firmware general information of peripheral entities.
+ */
+#define MLXSW_REG_MGPIR_ID 0x9100
+#define MLXSW_REG_MGPIR_LEN 0xA0
+
+MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN);
+
+enum mlxsw_reg_mgpir_device_type {
+	MLXSW_REG_MGPIR_DEVICE_TYPE_NONE,
+	MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
+};
+
+/* device_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4);
+
+/* devices_per_flash
+ * Number of devices of device_type per flash (can be shared by few devices).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
+
+/* num_of_devices
+ * Number of devices of device_type.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+
+static inline void mlxsw_reg_mgpir_pack(char *payload)
+{
+	MLXSW_REG_ZERO(mgpir, payload);
+}
+
+static inline void
+mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
+		       enum mlxsw_reg_mgpir_device_type *device_type,
+		       u8 *devices_per_flash)
+{
+	if (num_of_devices)
+		*num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
+	if (device_type)
+		*device_type = mlxsw_reg_mgpir_device_type_get(payload);
+	if (devices_per_flash)
+		*devices_per_flash =
+				mlxsw_reg_mgpir_devices_per_flash_get(payload);
+}
+
 /* TNGCR - Tunneling NVE General Configuration Register
  * ----------------------------------------------------
  * The TNGCR register is used for setting up the NVE Tunneling configuration.
@@ -10006,6 +10502,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(qpdsm),
 	MLXSW_REG(qpdpm),
 	MLXSW_REG(qtctm),
+	MLXSW_REG(qpsc),
 	MLXSW_REG(pmlp),
 	MLXSW_REG(pmtu),
 	MLXSW_REG(ptys),
@@ -10052,12 +10549,19 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(mgir),
 	MLXSW_REG(mrsr),
 	MLXSW_REG(mlcr),
+	MLXSW_REG(mtpps),
+	MLXSW_REG(mtutc),
 	MLXSW_REG(mpsc),
 	MLXSW_REG(mcqi),
 	MLXSW_REG(mcc),
 	MLXSW_REG(mcda),
 	MLXSW_REG(mgpc),
 	MLXSW_REG(mprs),
+	MLXSW_REG(mogcr),
+	MLXSW_REG(mtpppc),
+	MLXSW_REG(mtpptr),
+	MLXSW_REG(mtptpt),
+	MLXSW_REG(mgpir),
 	MLXSW_REG(tngcr),
 	MLXSW_REG(tnumt),
 	MLXSW_REG(tnqcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 23204356ad88..4d34d42b3b0e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -41,6 +41,7 @@
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
 #include "spectrum_span.h"
+#include "spectrum_ptp.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
@@ -146,6 +147,35 @@ struct mlxsw_sp_mlxfw_dev {
 	struct mlxsw_sp *mlxsw_sp;
 };
 
+struct mlxsw_sp_ptp_ops {
+	struct mlxsw_sp_ptp_clock *
+		(*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+	void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
+
+	struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp);
+	void (*fini)(struct mlxsw_sp_ptp_state *ptp_state);
+
+	/* Notify a driver that a packet that might be PTP was received. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			u8 local_port);
+
+	/* Notify a driver that a timestamped packet was transmitted. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			    u8 local_port);
+
+	int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct hwtstamp_config *config);
+	int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct hwtstamp_config *config);
+	void (*shaper_work)(struct work_struct *work);
+	int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp,
+			   struct ethtool_ts_info *info);
+};
+
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
 				    u16 component_index, u32 *p_max_size,
 				    u8 *p_align_bits, u16 *p_max_write_size)
@@ -294,6 +324,19 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
 }
 
+static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev,
+				   const char *msg, const char *comp_name,
+				   u32 done_bytes, u32 total_bytes)
+{
+	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+
+	devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core),
+					   msg, comp_name,
+					   done_bytes, total_bytes);
+}
+
 static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
 	.component_query	= mlxsw_sp_component_query,
 	.fsm_lock		= mlxsw_sp_fsm_lock,
@@ -303,11 +346,13 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
 	.fsm_activate		= mlxsw_sp_fsm_activate,
 	.fsm_query_state	= mlxsw_sp_fsm_query_state,
 	.fsm_cancel		= mlxsw_sp_fsm_cancel,
-	.fsm_release		= mlxsw_sp_fsm_release
+	.fsm_release		= mlxsw_sp_fsm_release,
+	.status_notify		= mlxsw_sp_status_notify,
 };
 
 static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
-				   const struct firmware *firmware)
+				   const struct firmware *firmware,
+				   struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = {
 		.mlxfw_dev = {
@@ -320,7 +365,10 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_core_fw_flash_start(mlxsw_sp->core);
-	err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware);
+	devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core));
+	err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev,
+				   firmware, extack);
+	devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core));
 	mlxsw_core_fw_flash_end(mlxsw_sp->core);
 
 	return err;
@@ -374,7 +422,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 		return err;
 	}
 
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
+	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL);
 	release_firmware(firmware);
 	if (err)
 		dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n");
@@ -388,6 +436,27 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 		return 0;
 }
 
+static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core,
+				 const char *file_name, const char *component,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct firmware *firmware;
+	int err;
+
+	if (component)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&firmware, file_name,
+				      mlxsw_sp->bus_info->dev);
+	if (err)
+		return err;
+	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack);
+	release_firmware(firmware);
+
+	return err;
+}
+
 int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
 			      unsigned int counter_index, u64 *packets,
 			      u64 *bytes)
@@ -738,6 +807,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	u64 len;
 	int err;
 
+	memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
 	if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
 		return NETDEV_TX_BUSY;
 
@@ -1437,21 +1508,21 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
-			     struct tc_cls_flower_offload *f)
+			     struct flow_cls_offload *f)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block);
 
 	switch (f->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f);
 		return 0;
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_TMPLT_CREATE:
+	case FLOW_CLS_TMPLT_CREATE:
 		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_TMPLT_DESTROY:
+	case FLOW_CLS_TMPLT_DESTROY:
 		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
 		return 0;
 	default:
@@ -1514,33 +1585,45 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 	}
 }
 
+static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
+{
+	struct mlxsw_sp_acl_block *acl_block = cb_priv;
+
+	mlxsw_sp_acl_block_destroy(acl_block);
+}
+
+static LIST_HEAD(mlxsw_sp_block_cb_list);
+
 static int
 mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-				    struct tcf_block *block, bool ingress,
-				    struct netlink_ext_ack *extack)
+			            struct flow_block_offload *f, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
-	struct tcf_block_cb *block_cb;
+	struct flow_block_cb *block_cb;
+	bool register_block = false;
 	int err;
 
-	block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
-				       mlxsw_sp);
+	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+					mlxsw_sp);
 	if (!block_cb) {
-		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net);
+		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
 		if (!acl_block)
 			return -ENOMEM;
-		block_cb = __tcf_block_cb_register(block,
-						   mlxsw_sp_setup_tc_block_cb_flower,
-						   mlxsw_sp, acl_block, extack);
+		block_cb = flow_block_cb_alloc(f->net,
+					       mlxsw_sp_setup_tc_block_cb_flower,
+					       mlxsw_sp, acl_block,
+					       mlxsw_sp_tc_block_flower_release);
 		if (IS_ERR(block_cb)) {
+			mlxsw_sp_acl_block_destroy(acl_block);
 			err = PTR_ERR(block_cb);
 			goto err_cb_register;
 		}
+		register_block = true;
 	} else {
-		acl_block = tcf_block_cb_priv(block_cb);
+		acl_block = flow_block_cb_priv(block_cb);
 	}
-	tcf_block_cb_incref(block_cb);
+	flow_block_cb_incref(block_cb);
 	err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
 				      mlxsw_sp_port, ingress);
 	if (err)
@@ -1551,28 +1634,31 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 	else
 		mlxsw_sp_port->eg_acl_block = acl_block;
 
+	if (register_block) {
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
+	}
+
 	return 0;
 
 err_block_bind:
-	if (!tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block, block_cb);
+	if (!flow_block_cb_decref(block_cb))
+		flow_block_cb_free(block_cb);
 err_cb_register:
-		mlxsw_sp_acl_block_destroy(acl_block);
-	}
 	return err;
 }
 
 static void
 mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct tcf_block *block, bool ingress)
+				      struct flow_block_offload *f, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
-	struct tcf_block_cb *block_cb;
+	struct flow_block_cb *block_cb;
 	int err;
 
-	block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
-				       mlxsw_sp);
+	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+					mlxsw_sp);
 	if (!block_cb)
 		return;
 
@@ -1581,50 +1667,63 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 	else
 		mlxsw_sp_port->eg_acl_block = NULL;
 
-	acl_block = tcf_block_cb_priv(block_cb);
+	acl_block = flow_block_cb_priv(block_cb);
 	err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
 					mlxsw_sp_port, ingress);
-	if (!err && !tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block, block_cb);
-		mlxsw_sp_acl_block_destroy(acl_block);
+	if (!err && !flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 	}
 }
 
 static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
-				   struct tc_block_offload *f)
+				   struct flow_block_offload *f)
 {
+	struct flow_block_cb *block_cb;
 	tc_setup_cb_t *cb;
 	bool ingress;
 	int err;
 
-	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
 		cb = mlxsw_sp_setup_tc_block_cb_matchall_ig;
 		ingress = true;
-	} else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
 		cb = mlxsw_sp_setup_tc_block_cb_matchall_eg;
 		ingress = false;
 	} else {
 		return -EOPNOTSUPP;
 	}
 
+	f->driver_block_list = &mlxsw_sp_block_cb_list;
+
 	switch (f->command) {
-	case TC_BLOCK_BIND:
-		err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
-					    mlxsw_sp_port, f->extack);
-		if (err)
-			return err;
-		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
-							  f->block, ingress,
-							  f->extack);
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, mlxsw_sp_port,
+					  &mlxsw_sp_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port,
+					       mlxsw_sp_port, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, f,
+							  ingress);
 		if (err) {
-			tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+			flow_block_cb_free(block_cb);
 			return err;
 		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
 		return 0;
-	case TC_BLOCK_UNBIND:
+	case FLOW_BLOCK_UNBIND:
 		mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
-						      f->block, ingress);
-		tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+						      f, ingress);
+		block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1745,6 +1844,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev)
 						mlxsw_sp_port->local_port);
 }
 
+static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port,
+							     &config);
+	if (err)
+		return err;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port,
+							     &config);
+	if (err)
+		return err;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct hwtstamp_config config = {0};
+
+	mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config);
+}
+
+static int
+mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr);
+	case SIOCGHWTSTAMP:
+		return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
 	.ndo_open		= mlxsw_sp_port_open,
 	.ndo_stop		= mlxsw_sp_port_stop,
@@ -1760,6 +1918,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
 	.ndo_set_features	= mlxsw_sp_set_features,
 	.ndo_get_devlink_port	= mlxsw_sp_port_get_devlink_port,
+	.ndo_do_ioctl		= mlxsw_sp_port_ioctl,
 };
 
 static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
@@ -2525,28 +2684,33 @@ mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
 	}
 }
 
+static u32
+mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask)
+			return mlxsw_sp1_port_link_mode[i].speed;
+	}
+
+	return SPEED_UNKNOWN;
+}
+
 static void
 mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
 				 u32 ptys_eth_proto,
 				 struct ethtool_link_ksettings *cmd)
 {
-	u32 speed = SPEED_UNKNOWN;
-	u8 duplex = DUPLEX_UNKNOWN;
-	int i;
+	cmd->base.speed = SPEED_UNKNOWN;
+	cmd->base.duplex = DUPLEX_UNKNOWN;
 
 	if (!carrier_ok)
-		goto out;
+		return;
 
-	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) {
-			speed = mlxsw_sp1_port_link_mode[i].speed;
-			duplex = DUPLEX_FULL;
-			break;
-		}
-	}
-out:
-	cmd->base.speed = speed;
-	cmd->base.duplex = duplex;
+	cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+	if (cmd->base.speed != SPEED_UNKNOWN)
+		cmd->base.duplex = DUPLEX_FULL;
 }
 
 static u32
@@ -2617,6 +2781,7 @@ static const struct mlxsw_sp_port_type_speed_ops
 mlxsw_sp1_port_type_speed_ops = {
 	.from_ptys_supported_port	= mlxsw_sp1_from_ptys_supported_port,
 	.from_ptys_link			= mlxsw_sp1_from_ptys_link,
+	.from_ptys_speed		= mlxsw_sp1_from_ptys_speed,
 	.from_ptys_speed_duplex		= mlxsw_sp1_from_ptys_speed_duplex,
 	.to_ptys_advert_link		= mlxsw_sp1_to_ptys_advert_link,
 	.to_ptys_speed			= mlxsw_sp1_to_ptys_speed,
@@ -2867,28 +3032,33 @@ mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
 	}
 }
 
+static u32
+mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask)
+			return mlxsw_sp2_port_link_mode[i].speed;
+	}
+
+	return SPEED_UNKNOWN;
+}
+
 static void
 mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
 				 u32 ptys_eth_proto,
 				 struct ethtool_link_ksettings *cmd)
 {
-	u32 speed = SPEED_UNKNOWN;
-	u8 duplex = DUPLEX_UNKNOWN;
-	int i;
+	cmd->base.speed = SPEED_UNKNOWN;
+	cmd->base.duplex = DUPLEX_UNKNOWN;
 
 	if (!carrier_ok)
-		goto out;
+		return;
 
-	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) {
-			speed = mlxsw_sp2_port_link_mode[i].speed;
-			duplex = DUPLEX_FULL;
-			break;
-		}
-	}
-out:
-	cmd->base.speed = speed;
-	cmd->base.duplex = duplex;
+	cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+	if (cmd->base.speed != SPEED_UNKNOWN)
+		cmd->base.duplex = DUPLEX_FULL;
 }
 
 static bool
@@ -2999,6 +3169,7 @@ static const struct mlxsw_sp_port_type_speed_ops
 mlxsw_sp2_port_type_speed_ops = {
 	.from_ptys_supported_port	= mlxsw_sp2_from_ptys_supported_port,
 	.from_ptys_link			= mlxsw_sp2_from_ptys_link,
+	.from_ptys_speed		= mlxsw_sp2_from_ptys_speed,
 	.from_ptys_speed_duplex		= mlxsw_sp2_from_ptys_speed_duplex,
 	.to_ptys_advert_link		= mlxsw_sp2_to_ptys_advert_link,
 	.to_ptys_speed			= mlxsw_sp2_to_ptys_speed,
@@ -3159,31 +3330,6 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	return 0;
 }
 
-static int mlxsw_sp_flash_device(struct net_device *dev,
-				 struct ethtool_flash *flash)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	const struct firmware *firmware;
-	int err;
-
-	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
-		return -EOPNOTSUPP;
-
-	dev_hold(dev);
-	rtnl_unlock();
-
-	err = request_firmware_direct(&firmware, flash->data, &dev->dev);
-	if (err)
-		goto out;
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
-	release_firmware(firmware);
-out:
-	rtnl_lock();
-	dev_put(dev);
-	return err;
-}
-
 static int mlxsw_sp_get_module_info(struct net_device *netdev,
 				    struct ethtool_modinfo *modinfo)
 {
@@ -3213,6 +3359,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
 	return err;
 }
 
+static int
+mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
+}
+
 static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.get_drvinfo		= mlxsw_sp_port_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -3224,9 +3379,9 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.get_sset_count		= mlxsw_sp_port_get_sset_count,
 	.get_link_ksettings	= mlxsw_sp_port_get_link_ksettings,
 	.set_link_ksettings	= mlxsw_sp_port_set_link_ksettings,
-	.flash_device		= mlxsw_sp_flash_device,
 	.get_module_info	= mlxsw_sp_get_module_info,
 	.get_module_eeprom	= mlxsw_sp_get_module_eeprom,
+	.get_ts_info		= mlxsw_sp_get_ts_info,
 };
 
 static int
@@ -3343,8 +3498,9 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 			return err;
 	}
 
-	/* Make sure the max shaper is disabled in all hierarchies that
-	 * support it.
+	/* Make sure the max shaper is disabled in all hierarchies that support
+	 * it. Note that this disables ptps (PTP shaper), but that is intended
+	 * for the initial configuration.
 	 */
 	err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
 					    MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
@@ -3589,6 +3745,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	}
 	mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
 
+	INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
+			  mlxsw_sp->ptp_ops->shaper_work);
+
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
 	if (err) {
@@ -3643,6 +3802,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
 	cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+	cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
+	mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
@@ -3927,14 +4088,55 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 	if (status == MLXSW_PORT_OPER_STATUS_UP) {
 		netdev_info(mlxsw_sp_port->dev, "link up\n");
 		netif_carrier_on(mlxsw_sp_port->dev);
+		mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
 	} else {
 		netdev_info(mlxsw_sp_port->dev, "link down\n");
 		netif_carrier_off(mlxsw_sp_port->dev);
 	}
 }
 
-static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
-					      u8 local_port, void *priv)
+static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp,
+					  char *mtpptr_pl, bool ingress)
+{
+	u8 local_port;
+	u8 num_rec;
+	int i;
+
+	local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl);
+	num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl);
+	for (i = 0; i < num_rec; i++) {
+		u8 domain_number;
+		u8 message_type;
+		u16 sequence_id;
+		u64 timestamp;
+
+		mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type,
+					&domain_number, &sequence_id,
+					&timestamp);
+		mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port,
+					    message_type, domain_number,
+					    sequence_id, timestamp);
+	}
+}
+
+static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg,
+					      char *mtpptr_pl, void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true);
+}
+
+static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg,
+					      char *mtpptr_pl, void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false);
+}
+
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+				       u8 local_port, void *priv)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -4008,6 +4210,14 @@ out:
 	consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
+				     void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -4029,7 +4239,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* L2 traps */
 	MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
 	MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
-	MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
+		  false, SP_LLDP, DISCARD),
 	MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
 	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
@@ -4098,6 +4309,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* NVE traps */
 	MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
+	/* PTP traps */
+	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
+		  false, SP_PTP0, DISCARD),
+	MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false),
+};
+
+static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+	/* Events */
+	MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0),
+	MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0),
 };
 
 static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -4149,6 +4370,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			rate = 1024;
 			burst_size = 7;
 			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+			rate = 24 * 1024;
+			burst_size = 12;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
+			rate = 19 * 1024;
+			burst_size = 12;
+			break;
 		default:
 			continue;
 		}
@@ -4187,6 +4416,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
 			priority = 5;
 			tc = 5;
 			break;
@@ -4204,6 +4434,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
 			priority = 2;
 			tc = 2;
 			break;
@@ -4237,22 +4468,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	return 0;
 }
 
-static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_listener listeners[],
+				   size_t listeners_count)
 {
 	int i;
 	int err;
 
-	err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
-	if (err)
-		return err;
-
-	err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
-	if (err)
-		return err;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+	for (i = 0; i < listeners_count; i++) {
 		err = mlxsw_core_trap_register(mlxsw_sp->core,
-					       &mlxsw_sp_listener[i],
+					       &listeners[i],
 					       mlxsw_sp);
 		if (err)
 			goto err_listener_register;
@@ -4263,23 +4488,63 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
 err_listener_register:
 	for (i--; i >= 0; i--) {
 		mlxsw_core_trap_unregister(mlxsw_sp->core,
-					   &mlxsw_sp_listener[i],
+					   &listeners[i],
 					   mlxsw_sp);
 	}
 	return err;
 }
 
-static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_listener listeners[],
+				      size_t listeners_count)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+	for (i = 0; i < listeners_count; i++) {
 		mlxsw_core_trap_unregister(mlxsw_sp->core,
-					   &mlxsw_sp_listener[i],
+					   &listeners[i],
 					   mlxsw_sp);
 	}
 }
 
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+				      ARRAY_SIZE(mlxsw_sp_listener));
+	if (err)
+		return err;
+
+	err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
+				      mlxsw_sp->listeners_count);
+	if (err)
+		goto err_extra_traps_init;
+
+	return 0;
+
+err_extra_traps_init:
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+				  ARRAY_SIZE(mlxsw_sp_listener));
+	return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
+				  mlxsw_sp->listeners_count);
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+				  ARRAY_SIZE(mlxsw_sp_listener));
+}
+
 #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
 
 static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
@@ -4332,6 +4597,32 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
+static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
+	.clock_init	= mlxsw_sp1_ptp_clock_init,
+	.clock_fini	= mlxsw_sp1_ptp_clock_fini,
+	.init		= mlxsw_sp1_ptp_init,
+	.fini		= mlxsw_sp1_ptp_fini,
+	.receive	= mlxsw_sp1_ptp_receive,
+	.transmitted	= mlxsw_sp1_ptp_transmitted,
+	.hwtstamp_get	= mlxsw_sp1_ptp_hwtstamp_get,
+	.hwtstamp_set	= mlxsw_sp1_ptp_hwtstamp_set,
+	.shaper_work	= mlxsw_sp1_ptp_shaper_work,
+	.get_ts_info	= mlxsw_sp1_ptp_get_ts_info,
+};
+
+static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
+	.clock_init	= mlxsw_sp2_ptp_clock_init,
+	.clock_fini	= mlxsw_sp2_ptp_clock_fini,
+	.init		= mlxsw_sp2_ptp_init,
+	.fini		= mlxsw_sp2_ptp_fini,
+	.receive	= mlxsw_sp2_ptp_receive,
+	.transmitted	= mlxsw_sp2_ptp_transmitted,
+	.hwtstamp_get	= mlxsw_sp2_ptp_hwtstamp_get,
+	.hwtstamp_set	= mlxsw_sp2_ptp_hwtstamp_set,
+	.shaper_work	= mlxsw_sp2_ptp_shaper_work,
+	.get_ts_info	= mlxsw_sp2_ptp_get_ts_info,
+};
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
@@ -4429,6 +4720,28 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_router_init;
 	}
 
+	if (mlxsw_sp->bus_info->read_frc_capable) {
+		/* NULL is a valid return value from clock_init */
+		mlxsw_sp->clock =
+			mlxsw_sp->ptp_ops->clock_init(mlxsw_sp,
+						      mlxsw_sp->bus_info->dev);
+		if (IS_ERR(mlxsw_sp->clock)) {
+			err = PTR_ERR(mlxsw_sp->clock);
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n");
+			goto err_ptp_clock_init;
+		}
+	}
+
+	if (mlxsw_sp->clock) {
+		/* NULL is a valid return value from ptp_ops->init */
+		mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp);
+		if (IS_ERR(mlxsw_sp->ptp_state)) {
+			err = PTR_ERR(mlxsw_sp->ptp_state);
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n");
+			goto err_ptp_init;
+		}
+	}
+
 	/* Initialize netdevice notifier after router and SPAN is initialized,
 	 * so that the event handler can use router structures and call SPAN
 	 * respin.
@@ -4459,6 +4772,12 @@ err_ports_create:
 err_dpipe_init:
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
+	if (mlxsw_sp->clock)
+		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+err_ptp_init:
+	if (mlxsw_sp->clock)
+		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+err_ptp_clock_init:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
 	mlxsw_sp_acl_fini(mlxsw_sp);
@@ -4502,6 +4821,9 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
+	mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+	mlxsw_sp->listeners = mlxsw_sp1_listener;
+	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -4521,6 +4843,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
+	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -4532,6 +4855,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	if (mlxsw_sp->clock) {
+		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+	}
 	mlxsw_sp_router_fini(mlxsw_sp);
 	mlxsw_sp_acl_fini(mlxsw_sp);
 	mlxsw_sp_nve_fini(mlxsw_sp);
@@ -4874,6 +5201,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_params_unregister(mlxsw_core);
 }
 
+static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				     struct sk_buff *skb, u8 local_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+	skb_pull(skb, MLXSW_TXHDR_LEN);
+	mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port);
+}
+
 static struct mlxsw_driver mlxsw_sp1_driver = {
 	.kind				= mlxsw_sp1_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
@@ -4892,11 +5228,13 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
+	.flash_update			= mlxsw_sp_flash_update,
 	.txhdr_construct		= mlxsw_sp_txhdr_construct,
 	.resources_register		= mlxsw_sp1_resources_register,
 	.kvd_sizes_get			= mlxsw_sp_kvd_sizes_get,
 	.params_register		= mlxsw_sp_params_register,
 	.params_unregister		= mlxsw_sp_params_unregister,
+	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp1_config_profile,
 	.res_query_enabled		= true,
@@ -4920,10 +5258,12 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
+	.flash_update			= mlxsw_sp_flash_update,
 	.txhdr_construct		= mlxsw_sp_txhdr_construct,
 	.resources_register		= mlxsw_sp2_resources_register,
 	.params_register		= mlxsw_sp2_params_register,
 	.params_unregister		= mlxsw_sp2_params_unregister,
+	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp2_config_profile,
 	.res_query_enabled		= true,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 8601b3041acd..a252b080dda9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -136,6 +136,8 @@ struct mlxsw_sp_acl_tcam_ops;
 struct mlxsw_sp_nve_ops;
 struct mlxsw_sp_sb_vals;
 struct mlxsw_sp_port_type_speed_ops;
+struct mlxsw_sp_ptp_state;
+struct mlxsw_sp_ptp_ops;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
@@ -155,6 +157,8 @@ struct mlxsw_sp {
 	struct mlxsw_sp_kvdl *kvdl;
 	struct mlxsw_sp_nve *nve;
 	struct notifier_block netdevice_nb;
+	struct mlxsw_sp_ptp_clock *clock;
+	struct mlxsw_sp_ptp_state *ptp_state;
 
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct {
@@ -172,6 +176,9 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_sb_vals *sb_vals;
 	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+	const struct mlxsw_sp_ptp_ops *ptp_ops;
+	const struct mlxsw_listener *listeners;
+	size_t listeners_count;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -259,6 +266,12 @@ struct mlxsw_sp_port {
 	unsigned acl_rule_count;
 	struct mlxsw_sp_acl_block *ing_acl_block;
 	struct mlxsw_sp_acl_block *eg_acl_block;
+	struct {
+		struct delayed_work shaper_dw;
+		struct hwtstamp_config hwtstamp_config;
+		u16 ing_types;
+		u16 egr_types;
+	} ptp;
 };
 
 struct mlxsw_sp_port_type_speed_ops {
@@ -267,6 +280,7 @@ struct mlxsw_sp_port_type_speed_ops {
 					 struct ethtool_link_ksettings *cmd);
 	void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
 			       unsigned long *mode);
+	u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto);
 	void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp,
 				       bool carrier_ok, u32 ptys_eth_proto,
 				       struct ethtool_link_ksettings *cmd);
@@ -435,6 +449,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
 extern struct notifier_block mlxsw_sp_switchdev_notifier;
 
 /* spectrum.c */
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+				       u8 local_port, void *priv);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight);
@@ -620,6 +636,15 @@ enum mlxsw_sp_acl_profile {
 	MLXSW_SP_ACL_PROFILE_MR,
 };
 
+struct mlxsw_sp_acl_block {
+	struct list_head binding_list;
+	struct mlxsw_sp_acl_ruleset *ruleset_zero;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned int rule_count;
+	unsigned int disable_count;
+	struct net *net;
+};
+
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
 unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
@@ -782,19 +807,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
-			    struct tc_cls_flower_offload *f);
+			    struct flow_cls_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_acl_block *block,
-			     struct tc_cls_flower_offload *f);
+			     struct flow_cls_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
-			  struct tc_cls_flower_offload *f);
+			  struct flow_cls_offload *f);
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
-				 struct tc_cls_flower_offload *f);
+				 struct flow_cls_offload *f);
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_block *block,
-				   struct tc_cls_flower_offload *f);
+				   struct flow_cls_offload *f);
 
 /* spectrum_qdisc.c */
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index a146a44634e9..e8ac90564dbe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -45,14 +45,6 @@ struct mlxsw_sp_acl_block_binding {
 	bool ingress;
 };
 
-struct mlxsw_sp_acl_block {
-	struct list_head binding_list;
-	struct mlxsw_sp_acl_ruleset *ruleset_zero;
-	struct mlxsw_sp *mlxsw_sp;
-	unsigned int rule_count;
-	unsigned int disable_count;
-};
-
 struct mlxsw_sp_acl_ruleset_ht_key {
 	struct mlxsw_sp_acl_block *block;
 	u32 chain_index;
@@ -221,6 +213,7 @@ struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
 		return NULL;
 	INIT_LIST_HEAD(&block->binding_list);
 	block->mlxsw_sp = mlxsw_sp;
+	block->net = net;
 	return block;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 2a998dea4f39..279c241f76f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -12,7 +12,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
@@ -20,7 +20,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
@@ -32,13 +32,13 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
@@ -149,7 +149,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = {
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */
+	MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 96b23c856f4d..202e9a246019 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -120,8 +120,51 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
+				      struct flow_cls_offload *f,
+				      struct mlxsw_sp_acl_block *block)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct net_device *ingress_dev;
+	struct flow_match_meta match;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+		return 0;
+
+	flow_rule_match_meta(rule, &match);
+	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
+		return -EINVAL;
+	}
+
+	ingress_dev = __dev_get_by_index(block->net,
+					 match.key->ingress_ifindex);
+	if (!ingress_dev) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on");
+		return -EINVAL;
+	}
+
+	if (!mlxsw_sp_port_dev_check(ingress_dev)) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port");
+		return -EINVAL;
+	}
+
+	mlxsw_sp_port = netdev_priv(ingress_dev);
+	if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device");
+		return -EINVAL;
+	}
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei,
+				       MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
+				       mlxsw_sp_port->local_port,
+				       0xFFFFFFFF);
+	return 0;
+}
+
 static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f)
+				       struct flow_cls_offload *f)
 {
 	struct flow_match_ipv4_addrs match;
 
@@ -136,7 +179,7 @@ static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
 }
 
 static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f)
+				       struct flow_cls_offload *f)
 {
 	struct flow_match_ipv6_addrs match;
 
@@ -170,10 +213,10 @@ static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
 
 static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f,
+				       struct flow_cls_offload *f,
 				       u8 ip_proto)
 {
-	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_match_ports match;
 
 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS))
@@ -197,10 +240,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_rule_info *rulei,
-				     struct tc_cls_flower_offload *f,
+				     struct flow_cls_offload *f,
 				     u8 ip_proto)
 {
-	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_match_tcp match;
 
 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP))
@@ -222,10 +265,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_acl_rule_info *rulei,
-				    struct tc_cls_flower_offload *f,
+				    struct flow_cls_offload *f,
 				    u16 n_proto)
 {
-	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_match_ip match;
 
 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP))
@@ -256,9 +299,9 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
 				 struct mlxsw_sp_acl_rule_info *rulei,
-				 struct tc_cls_flower_offload *f)
+				 struct flow_cls_offload *f)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0;
 	u16 n_proto_key = 0;
@@ -267,7 +310,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	if (dissector->used_keys &
-	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -283,6 +327,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 
 	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
+	err = mlxsw_sp_flower_parse_meta(rulei, f, block);
+	if (err)
+		return err;
+
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_match_control match;
 
@@ -378,7 +426,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
-			    struct tc_cls_flower_offload *f)
+			    struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_rule_info *rulei;
 	struct mlxsw_sp_acl_ruleset *ruleset;
@@ -425,7 +473,7 @@ err_rule_create:
 
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_acl_block *block,
-			     struct tc_cls_flower_offload *f)
+			     struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule *rule;
@@ -447,7 +495,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
-			  struct tc_cls_flower_offload *f)
+			  struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule *rule;
@@ -483,7 +531,7 @@ err_rule_get_stats:
 
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
-				 struct tc_cls_flower_offload *f)
+				 struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule_info rulei;
@@ -504,7 +552,7 @@ int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_block *block,
-				   struct tc_cls_flower_offload *f)
+				   struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
new file mode 100644
index 000000000000..bd9c2bc2d5d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -0,0 +1,1111 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/clocksource.h>
+#include <linux/timecounter.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+
+#include "spectrum.h"
+#include "spectrum_ptp.h"
+#include "core.h"
+
+#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT	29
+#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ		156257 /* 6.4nSec */
+#define MLXSW_SP1_PTP_CLOCK_MASK		64
+
+#define MLXSW_SP1_PTP_HT_GC_INTERVAL		500 /* ms */
+
+/* How long, approximately, should the unmatched entries stay in the hash table
+ * before they are collected. Should be evenly divisible by the GC interval.
+ */
+#define MLXSW_SP1_PTP_HT_GC_TIMEOUT		1000 /* ms */
+
+struct mlxsw_sp_ptp_state {
+	struct mlxsw_sp *mlxsw_sp;
+	struct rhashtable unmatched_ht;
+	spinlock_t unmatched_lock; /* protects the HT */
+	struct delayed_work ht_gc_dw;
+	u32 gc_cycle;
+};
+
+struct mlxsw_sp1_ptp_key {
+	u8 local_port;
+	u8 message_type;
+	u16 sequence_id;
+	u8 domain_number;
+	bool ingress;
+};
+
+struct mlxsw_sp1_ptp_unmatched {
+	struct mlxsw_sp1_ptp_key key;
+	struct rhash_head ht_node;
+	struct rcu_head rcu;
+	struct sk_buff *skb;
+	u64 timestamp;
+	u32 gc_cycle;
+};
+
+static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = {
+	.key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key),
+	.key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key),
+	.head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node),
+};
+
+struct mlxsw_sp_ptp_clock {
+	struct mlxsw_core *core;
+	spinlock_t lock; /* protect this structure */
+	struct cyclecounter cycles;
+	struct timecounter tc;
+	u32 nominal_c_mult;
+	struct ptp_clock *ptp;
+	struct ptp_clock_info ptp_info;
+	unsigned long overflow_period;
+	struct delayed_work overflow_work;
+};
+
+static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp_ptp_clock *clock,
+				    struct ptp_system_timestamp *sts)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	u32 frc_h1, frc_h2, frc_l;
+
+	frc_h1 = mlxsw_core_read_frc_h(mlxsw_core);
+	ptp_read_system_prets(sts);
+	frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+	ptp_read_system_postts(sts);
+	frc_h2 = mlxsw_core_read_frc_h(mlxsw_core);
+
+	if (frc_h1 != frc_h2) {
+		/* wrap around */
+		ptp_read_system_prets(sts);
+		frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+		ptp_read_system_postts(sts);
+	}
+
+	return (u64) frc_l | (u64) frc_h2 << 32;
+}
+
+static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(cc, struct mlxsw_sp_ptp_clock, cycles);
+
+	return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask;
+}
+
+static int
+mlxsw_sp1_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+
+	mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ,
+			     freq_adj, 0);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec)
+{
+	u64 cycles = (u64) nsec;
+
+	cycles <<= tc->cc->shift;
+	cycles = div_u64(cycles, tc->cc->mult);
+
+	return cycles;
+}
+
+static int
+mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	u64 next_sec, next_sec_in_nsec, cycles;
+	char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+	char mtpps_pl[MLXSW_REG_MTPPS_LEN];
+	int err;
+
+	next_sec = div_u64(nsec, NSEC_PER_SEC) + 1;
+	next_sec_in_nsec = next_sec * NSEC_PER_SEC;
+
+	spin_lock_bh(&clock->lock);
+	cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec);
+	spin_unlock_bh(&clock->lock);
+
+	mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles);
+	err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtutc_pack(mtutc_pl,
+			     MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC,
+			     0, next_sec);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	int neg_adj = 0;
+	u32 diff;
+	u64 adj;
+	s32 ppb;
+
+	ppb = scaled_ppm_to_ppb(scaled_ppm);
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+
+	adj = clock->nominal_c_mult;
+	adj *= ppb;
+	diff = div_u64(adj, NSEC_PER_SEC);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_read(&clock->tc);
+	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+				       clock->nominal_c_mult + diff;
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb);
+}
+
+static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 nsec;
+
+	spin_lock_bh(&clock->lock);
+	timecounter_adjtime(&clock->tc, delta);
+	nsec = timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 cycles, nsec;
+
+	spin_lock_bh(&clock->lock);
+	cycles = __mlxsw_sp1_ptp_read_frc(clock, sts);
+	nsec = timecounter_cyc2time(&clock->tc, cycles);
+	spin_unlock_bh(&clock->lock);
+
+	*ts = ns_to_timespec64(nsec);
+
+	return 0;
+}
+
+static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 nsec = timespec64_to_ns(ts);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_init(&clock->tc, &clock->cycles, nsec);
+	nsec = timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "mlxsw_sp_clock",
+	.max_adj	= 100000000,
+	.adjfine	= mlxsw_sp1_ptp_adjfine,
+	.adjtime	= mlxsw_sp1_ptp_adjtime,
+	.gettimex64	= mlxsw_sp1_ptp_gettimex,
+	.settime64	= mlxsw_sp1_ptp_settime,
+};
+
+static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp_ptp_clock *clock;
+
+	clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+	mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period);
+}
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	u64 overflow_cycles, nsec, frac = 0;
+	struct mlxsw_sp_ptp_clock *clock;
+	int err;
+
+	clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+	if (!clock)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&clock->lock);
+	clock->cycles.read = mlxsw_sp1_ptp_read_frc;
+	clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT;
+	clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ,
+						  clock->cycles.shift);
+	clock->nominal_c_mult = clock->cycles.mult;
+	clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK);
+	clock->core = mlxsw_sp->core;
+
+	timecounter_init(&clock->tc, &clock->cycles,
+			 ktime_to_ns(ktime_get_real()));
+
+	/* Calculate period in seconds to call the overflow watchdog - to make
+	 * sure counter is checked at least twice every wrap around.
+	 * The period is calculated as the minimum between max HW cycles count
+	 * (The clock source mask) and max amount of cycles that can be
+	 * multiplied by clock multiplier where the result doesn't exceed
+	 * 64bits.
+	 */
+	overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
+	overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+
+	nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac);
+	clock->overflow_period = nsecs_to_jiffies(nsec);
+
+	INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow);
+	mlxsw_core_schedule_dw(&clock->overflow_work, 0);
+
+	clock->ptp_info = mlxsw_sp1_ptp_clock_info;
+	clock->ptp = ptp_clock_register(&clock->ptp_info, dev);
+	if (IS_ERR(clock->ptp)) {
+		err = PTR_ERR(clock->ptp);
+		dev_err(dev, "ptp_clock_register failed %d\n", err);
+		goto err_ptp_clock_register;
+	}
+
+	return clock;
+
+err_ptp_clock_register:
+	cancel_delayed_work_sync(&clock->overflow_work);
+	kfree(clock);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+	ptp_clock_unregister(clock->ptp);
+	cancel_delayed_work_sync(&clock->overflow_work);
+	kfree(clock);
+}
+
+static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
+			      u8 *p_domain_number,
+			      u8 *p_message_type,
+			      u16 *p_sequence_id)
+{
+	unsigned int offset = 0;
+	unsigned int ptp_class;
+	u8 *data;
+
+	data = skb_mac_header(skb);
+	ptp_class = ptp_classify_raw(skb);
+
+	switch (ptp_class & PTP_CLASS_VMASK) {
+	case PTP_CLASS_V1:
+	case PTP_CLASS_V2:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (ptp_class & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (ptp_class & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* PTP header is 34 bytes. */
+	if (skb->len < offset + 34)
+		return -EINVAL;
+
+	*p_message_type = data[offset] & 0x0f;
+	*p_domain_number = data[offset + 4];
+	*p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+	return 0;
+}
+
+/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on
+ * error.
+ */
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp1_ptp_key key,
+			     struct sk_buff *skb,
+			     u64 timestamp)
+{
+	int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
+	struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	struct mlxsw_sp1_ptp_unmatched *conflict;
+
+	unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC);
+	if (!unmatched)
+		return ERR_PTR(-ENOMEM);
+
+	unmatched->key = key;
+	unmatched->skb = skb;
+	unmatched->timestamp = timestamp;
+	unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
+
+	conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht,
+					    &unmatched->ht_node,
+					    mlxsw_sp1_ptp_unmatched_ht_params);
+	if (conflict)
+		kfree(unmatched);
+
+	return conflict;
+}
+
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_key key)
+{
+	return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
+				 mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+static int
+mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht,
+				      &unmatched->ht_node,
+				      mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+/* This function is called in the following scenarios:
+ *
+ * 1) When a packet is matched with its timestamp.
+ * 2) In several situation when it is necessary to immediately pass on
+ *    an SKB without a timestamp.
+ * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish().
+ *    This case is similar to 2) above.
+ */
+static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp,
+					struct sk_buff *skb, u8 local_port,
+					bool ingress,
+					struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	/* Between capturing the packet and finishing it, there is a window of
+	 * opportunity for the originating port to go away (e.g. due to a
+	 * split). Also make sure the SKB device reference is still valid.
+	 */
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) {
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	if (ingress) {
+		if (hwtstamps)
+			*skb_hwtstamps(skb) = *hwtstamps;
+		mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+	} else {
+		/* skb_tstamp_tx() allows hwtstamps to be NULL. */
+		skb_tstamp_tx(skb, hwtstamps);
+		dev_kfree_skb_any(skb);
+	}
+}
+
+static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp1_ptp_key key,
+				       struct sk_buff *skb,
+				       u64 timestamp)
+{
+	struct skb_shared_hwtstamps hwtstamps;
+	u64 nsec;
+
+	spin_lock_bh(&mlxsw_sp->clock->lock);
+	nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp);
+	spin_unlock_bh(&mlxsw_sp->clock->lock);
+
+	hwtstamps.hwtstamp = ns_to_ktime(nsec);
+	mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+				    key.local_port, key.ingress, &hwtstamps);
+}
+
+static void
+mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	if (unmatched->skb && unmatched->timestamp)
+		mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key,
+					   unmatched->skb,
+					   unmatched->timestamp);
+	else if (unmatched->skb)
+		mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb,
+					    unmatched->key.local_port,
+					    unmatched->key.ingress, NULL);
+	kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg)
+{
+	struct mlxsw_sp1_ptp_unmatched *unmatched = ptr;
+
+	/* This is invoked at a point where the ports are gone already. Nothing
+	 * to do with whatever is left in the HT but to free it.
+	 */
+	if (unmatched->skb)
+		dev_kfree_skb_any(unmatched->skb);
+	kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp1_ptp_key key,
+				    struct sk_buff *skb, u64 timestamp)
+{
+	struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict;
+	int err;
+
+	rcu_read_lock();
+
+	unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key);
+
+	spin_lock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+	if (unmatched) {
+		/* There was an unmatched entry when we looked, but it may have
+		 * been removed before we took the lock.
+		 */
+		err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
+		if (err)
+			unmatched = NULL;
+	}
+
+	if (!unmatched) {
+		/* We have no unmatched entry, but one may have been added after
+		 * we looked, but before we took the lock.
+		 */
+		unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+							 skb, timestamp);
+		if (IS_ERR(unmatched)) {
+			if (skb)
+				mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+							    key.local_port,
+							    key.ingress, NULL);
+			unmatched = NULL;
+		} else if (unmatched) {
+			/* Save just told us, under lock, that the entry is
+			 * there, so this has to work.
+			 */
+			err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp,
+							     unmatched);
+			WARN_ON_ONCE(err);
+		}
+	}
+
+	/* If unmatched is non-NULL here, it comes either from the lookup, or
+	 * from the save attempt above. In either case the entry was removed
+	 * from the hash table. If unmatched is NULL, a new unmatched entry was
+	 * added to the hash table, and there was no conflict.
+	 */
+
+	if (skb && unmatched && unmatched->timestamp) {
+		unmatched->skb = skb;
+	} else if (timestamp && unmatched && unmatched->skb) {
+		unmatched->timestamp = timestamp;
+	} else if (unmatched) {
+		/* unmatched holds an older entry of the same type: either an
+		 * skb if we are handling skb, or a timestamp if we are handling
+		 * timestamp. We can't match that up, so save what we have.
+		 */
+		conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+							skb, timestamp);
+		if (IS_ERR(conflict)) {
+			if (skb)
+				mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+							    key.local_port,
+							    key.ingress, NULL);
+		} else {
+			/* Above, we removed an object with this key from the
+			 * hash table, under lock, so conflict can not be a
+			 * valid pointer.
+			 */
+			WARN_ON_ONCE(conflict);
+		}
+	}
+
+	spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+	if (unmatched)
+		mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched);
+
+	rcu_read_unlock();
+}
+
+static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp,
+				     struct sk_buff *skb, u8 local_port,
+				     bool ingress)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp1_ptp_key key;
+	u8 types;
+	int err;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto immediate;
+
+	types = ingress ? mlxsw_sp_port->ptp.ing_types :
+			  mlxsw_sp_port->ptp.egr_types;
+	if (!types)
+		goto immediate;
+
+	memset(&key, 0, sizeof(key));
+	key.local_port = local_port;
+	key.ingress = ingress;
+
+	err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type,
+				 &key.sequence_id);
+	if (err)
+		goto immediate;
+
+	/* For packets whose timestamping was not enabled on this port, don't
+	 * bother trying to match the timestamp.
+	 */
+	if (!((1 << key.message_type) & types))
+		goto immediate;
+
+	mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0);
+	return;
+
+immediate:
+	mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL);
+}
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+				 u8 local_port, u8 message_type,
+				 u8 domain_number, u16 sequence_id,
+				 u64 timestamp)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp1_ptp_key key;
+	u8 types;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		return;
+
+	types = ingress ? mlxsw_sp_port->ptp.ing_types :
+			  mlxsw_sp_port->ptp.egr_types;
+
+	/* For message types whose timestamping was not enabled on this port,
+	 * don't bother with the timestamp.
+	 */
+	if (!((1 << message_type) & types))
+		return;
+
+	memset(&key, 0, sizeof(key));
+	key.local_port = local_port;
+	key.domain_number = domain_number;
+	key.message_type = message_type;
+	key.sequence_id = sequence_id;
+	key.ingress = ingress;
+
+	mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp);
+}
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			   u8 local_port)
+{
+	skb_reset_mac_header(skb);
+	mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true);
+}
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+			       struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false);
+}
+
+static void
+mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
+			    struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	int err;
+
+	/* If an unmatched entry has an SKB, it has to be handed over to the
+	 * networking stack. This is usually done from a trap handler, which is
+	 * invoked in a softirq context. Here we are going to do it in process
+	 * context. If that were to be interrupted by a softirq, it could cause
+	 * a deadlock when an attempt is made to take an already-taken lock
+	 * somewhere along the sending path. Disable softirqs to prevent this.
+	 */
+	local_bh_disable();
+
+	spin_lock(&ptp_state->unmatched_lock);
+	err = rhashtable_remove_fast(&ptp_state->unmatched_ht,
+				     &unmatched->ht_node,
+				     mlxsw_sp1_ptp_unmatched_ht_params);
+	spin_unlock(&ptp_state->unmatched_lock);
+
+	if (err)
+		/* The packet was matched with timestamp during the walk. */
+		goto out;
+
+	/* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While
+	 * the comment at that function states that it can only be called in
+	 * soft IRQ context, this pattern of local_bh_disable() +
+	 * netif_receive_skb(), in process context, is seen elsewhere in the
+	 * kernel, notably in pktgen.
+	 */
+	mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched);
+
+out:
+	local_bh_enable();
+}
+
+static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	struct mlxsw_sp_ptp_state *ptp_state;
+	struct rhashtable_iter iter;
+	u32 gc_cycle;
+	void *obj;
+
+	ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw);
+	gc_cycle = ptp_state->gc_cycle++;
+
+	rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter);
+	rhashtable_walk_start(&iter);
+	while ((obj = rhashtable_walk_next(&iter))) {
+		if (IS_ERR(obj))
+			continue;
+
+		unmatched = obj;
+		if (unmatched->gc_cycle <= gc_cycle)
+			mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched);
+	}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+
+	mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+			       MLXSW_SP1_PTP_HT_GC_INTERVAL);
+}
+
+static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp,
+				   enum mlxsw_reg_mtptpt_trap_id trap_id,
+				   u16 message_type)
+{
+	char mtptpt_pl[MLXSW_REG_MTPTPT_LEN];
+
+	mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl);
+}
+
+static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp,
+					      bool clr)
+{
+	char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0};
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr);
+	mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+}
+
+static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp,
+				    u16 ing_types, u16 egr_types)
+{
+	char mtpppc_pl[MLXSW_REG_MTPPPC_LEN];
+
+	mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl);
+}
+
+struct mlxsw_sp1_ptp_shaper_params {
+	u32 ethtool_speed;
+	enum mlxsw_reg_qpsc_port_speed port_speed;
+	u8 shaper_time_exp;
+	u8 shaper_time_mantissa;
+	u8 shaper_inc;
+	u8 shaper_bs;
+	u8 port_to_shaper_credits;
+	int ing_timestamp_inc;
+	int egr_timestamp_inc;
+};
+
+static const struct mlxsw_sp1_ptp_shaper_params
+mlxsw_sp1_ptp_shaper_params[] = {
+	{
+		.ethtool_speed		= SPEED_100,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_100M,
+		.shaper_time_exp	= 4,
+		.shaper_time_mantissa	= 12,
+		.shaper_inc		= 9,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -313,
+		.egr_timestamp_inc	= 313,
+	},
+	{
+		.ethtool_speed		= SPEED_1000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_1G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 12,
+		.shaper_inc		= 6,
+		.shaper_bs		= 0,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -35,
+		.egr_timestamp_inc	= 35,
+	},
+	{
+		.ethtool_speed		= SPEED_10000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_10G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 2,
+		.shaper_inc		= 14,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -11,
+		.egr_timestamp_inc	= 11,
+	},
+	{
+		.ethtool_speed		= SPEED_25000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_25G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 0,
+		.shaper_inc		= 11,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -14,
+		.egr_timestamp_inc	= 14,
+	},
+};
+
+#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params)
+
+static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp)
+{
+	const struct mlxsw_sp1_ptp_shaper_params *params;
+	char qpsc_pl[MLXSW_REG_QPSC_LEN];
+	int i, err;
+
+	for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+		params = &mlxsw_sp1_ptp_shaper_params[i];
+		mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed,
+				    params->shaper_time_exp,
+				    params->shaper_time_mantissa,
+				    params->shaper_inc, params->shaper_bs,
+				    params->port_to_shaper_credits,
+				    params->ing_timestamp_inc,
+				    params->egr_timestamp_inc);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_ptp_state *ptp_state;
+	u16 message_type;
+	int err;
+
+	err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp);
+	if (err)
+		return ERR_PTR(err);
+
+	ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL);
+	if (!ptp_state)
+		return ERR_PTR(-ENOMEM);
+	ptp_state->mlxsw_sp = mlxsw_sp;
+
+	spin_lock_init(&ptp_state->unmatched_lock);
+
+	err = rhashtable_init(&ptp_state->unmatched_ht,
+			      &mlxsw_sp1_ptp_unmatched_ht_params);
+	if (err)
+		goto err_hashtable_init;
+
+	/* Delive these message types as PTP0. */
+	message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP);
+	err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+				      message_type);
+	if (err)
+		goto err_mtptpt_set;
+
+	/* Everything else is PTP1. */
+	message_type = ~message_type;
+	err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+				      message_type);
+	if (err)
+		goto err_mtptpt1_set;
+
+	err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true);
+	if (err)
+		goto err_fifo_clr;
+
+	INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc);
+	mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+			       MLXSW_SP1_PTP_HT_GC_INTERVAL);
+	return ptp_state;
+
+err_fifo_clr:
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+err_mtptpt1_set:
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+err_mtptpt_set:
+	rhashtable_destroy(&ptp_state->unmatched_ht);
+err_hashtable_init:
+	kfree(ptp_state);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+	struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp;
+
+	cancel_delayed_work_sync(&ptp_state->ht_gc_dw);
+	mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0);
+	mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+	rhashtable_free_and_destroy(&ptp_state->unmatched_ht,
+				    &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
+	kfree(ptp_state);
+}
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config)
+{
+	*config = mlxsw_sp_port->ptp.hwtstamp_config;
+	return 0;
+}
+
+static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
+					  u16 *p_ing_types, u16 *p_egr_types,
+					  enum hwtstamp_rx_filters *p_rx_filter)
+{
+	enum hwtstamp_rx_filters rx_filter = config->rx_filter;
+	enum hwtstamp_tx_types tx_type = config->tx_type;
+	u16 ing_types = 0x00;
+	u16 egr_types = 0x00;
+
+	switch (tx_type) {
+	case HWTSTAMP_TX_OFF:
+		egr_types = 0x00;
+		break;
+	case HWTSTAMP_TX_ON:
+		egr_types = 0xff;
+		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		return -ERANGE;
+	}
+
+	switch (rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		ing_types = 0x00;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		ing_types = 0x01;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		ing_types = 0x02;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		ing_types = 0x0f;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+		ing_types = 0xff;
+		break;
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		return -ERANGE;
+	}
+
+	*p_ing_types = ing_types;
+	*p_egr_types = egr_types;
+	*p_rx_filter = rx_filter;
+	return 0;
+}
+
+static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
+				       u16 ing_types, u16 egr_types)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_port *tmp;
+	int i;
+
+	/* MTPPPC configures timestamping globally, not per port. Find the
+	 * configuration that contains all configured timestamping requests.
+	 */
+	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+		tmp = mlxsw_sp->ports[i];
+		if (tmp && tmp != mlxsw_sp_port) {
+			ing_types |= tmp->ptp.ing_types;
+			egr_types |= tmp->ptp.egr_types;
+		}
+	}
+
+	return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
+				       ing_types, egr_types);
+}
+
+static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types;
+}
+
+static int
+mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+	mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u32 eth_proto_oper, speed;
+	bool ptps = false;
+	int err, i;
+
+	if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+		return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false);
+
+	port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
+	port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
+					       mlxsw_sp_port->local_port, 0,
+					       false);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+	port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
+						 &eth_proto_oper);
+
+	speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
+	for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+		if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) {
+			ptps = true;
+			break;
+		}
+	}
+
+	return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps);
+}
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
+
+	mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
+				     ptp.shaper_dw);
+
+	if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+		return;
+
+	err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+	if (err)
+		netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n");
+}
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config)
+{
+	enum hwtstamp_rx_filters rx_filter;
+	u16 ing_types;
+	u16 egr_types;
+	int err;
+
+	err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types,
+					     &rx_filter);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->ptp.hwtstamp_config = *config;
+	mlxsw_sp_port->ptp.ing_types = ing_types;
+	mlxsw_sp_port->ptp.egr_types = egr_types;
+
+	err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+	if (err)
+		return err;
+
+	/* Notify the ioctl caller what we are actually timestamping. */
+	config->rx_filter = rx_filter;
+
+	return 0;
+}
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+			      struct ethtool_ts_info *info)
+{
+	info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
new file mode 100644
index 000000000000..72e55f6926b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_PTP_H
+#define _MLXSW_SPECTRUM_PTP_H
+
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+struct mlxsw_sp_ptp_clock;
+
+enum {
+	MLXSW_SP_PTP_MESSAGE_TYPE_SYNC,
+	MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ,
+	MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ,
+	MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP,
+};
+
+static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info)
+{
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+	info->phc_index = -1;
+	return 0;
+}
+
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock);
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp);
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state);
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			   u8 local_port);
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+			       struct sk_buff *skb, u8 local_port);
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+				 u8 local_port, u8 message_type,
+				 u8 domain_number, u16 sequence_id,
+				 u64 timestamp);
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config);
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config);
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work);
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+			      struct ethtool_ts_info *info);
+
+#else
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+					 struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+					     struct sk_buff *skb, u8 local_port)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static inline void
+mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+			    u8 local_port, u8 message_type,
+			    u8 domain_number,
+			    u16 sequence_id, u64 timestamp)
+{
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+					    struct ethtool_ts_info *info)
+{
+	return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+#endif
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+					 struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+					     struct sk_buff *skb, u8 local_port)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+					    struct ethtool_ts_info *info)
+{
+	return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ef554739dd54..e618be7ce6c6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -21,6 +21,7 @@
 #include <net/arp.h>
 #include <net/ip_fib.h>
 #include <net/ip6_fib.h>
+#include <net/nexthop.h>
 #include <net/fib_rules.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
@@ -2887,7 +2888,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
 		return false;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct in6_addr *gw;
 		int ifindex, weight;
 
@@ -2959,7 +2960,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
 	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
+		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
 		val ^= dev->ifindex;
 	}
 
@@ -3883,23 +3884,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
-				   const struct fib_info *fi)
+				   struct fib_info *fi)
 {
-	return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
-	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+	const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+	return nh->fib_nh_scope == RT_SCOPE_LINK ||
+	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
 }
 
 static struct mlxsw_sp_nexthop_group *
 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
+	unsigned int nhs = fib_info_num_path(fi);
 	struct mlxsw_sp_nexthop_group *nh_grp;
 	struct mlxsw_sp_nexthop *nh;
 	struct fib_nh *fib_nh;
 	int i;
 	int err;
 
-	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
-			 GFP_KERNEL);
+	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
 	if (!nh_grp)
 		return ERR_PTR(-ENOMEM);
 	nh_grp->priv = fi;
@@ -3907,11 +3910,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	nh_grp->neigh_tbl = &arp_tbl;
 
 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
-	nh_grp->count = fi->fib_nhs;
+	nh_grp->count = nhs;
 	fib_info_hold(fi);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
-		fib_nh = &fi->fib_nh[i];
+		fib_nh = fib_info_nh(fi, i);
 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
 		if (err)
 			goto err_nexthop4_init;
@@ -4027,9 +4030,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
+		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
-				    &rt->fib6_nh.fib_nh_gw6))
+				    &rt->fib6_nh->fib_nh_gw6))
 			return nh;
 		continue;
 	}
@@ -4089,13 +4092,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
+				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
-		struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct mlxsw_sp_nexthop *nh;
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
@@ -4117,7 +4120,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
+		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -4349,9 +4352,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 			     const struct fib_entry_notifier_info *fen_info,
 			     struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
-	struct net_device *dev = fen_info->fi->fib_dev;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct fib_info *fi = fen_info->fi;
 
@@ -4995,7 +4998,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
-	return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
+	return !(rt->fib6_flags & RTF_ADDRCONF) &&
+		rt->fib6_nh->fib_nh_gw_family;
 }
 
 static struct fib6_info *
@@ -5054,8 +5058,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
 					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
-	return rt->fib6_nh.fib_nh_dev &&
-	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
+	return rt->fib6_nh->fib_nh_dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
 }
 
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -5065,7 +5069,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
@@ -5108,11 +5112,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop *nh,
 				  const struct fib6_info *rt)
 {
-	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 
 	nh->nh_grp = nh_grp;
-	nh->nh_weight = rt->fib6_nh.fib_nh_weight;
-	memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
+	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 
 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -5135,7 +5139,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
 				    const struct fib6_info *rt)
 {
-	return rt->fib6_nh.fib_nh_gw_family ||
+	return rt->fib6_nh->fib_nh_gw_family ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
@@ -5274,17 +5278,21 @@ err_nexthop6_group_get:
 static int
 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct fib6_info *rt)
+				struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
-	int err;
+	int err, i;
 
-	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
-	if (IS_ERR(mlxsw_sp_rt6))
-		return PTR_ERR(mlxsw_sp_rt6);
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+		if (IS_ERR(mlxsw_sp_rt6)) {
+			err = PTR_ERR(mlxsw_sp_rt6);
+			goto err_rt6_create;
+		}
 
-	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
-	fib6_entry->nrt6++;
+		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+		fib6_entry->nrt6++;
+	}
 
 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
 	if (err)
@@ -5293,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_nexthop6_group_update:
-	fib6_entry->nrt6--;
-	list_del(&mlxsw_sp_rt6->list);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	i = nrt6;
+err_rt6_create:
+	for (i--; i >= 0; i--) {
+		fib6_entry->nrt6--;
+		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+					       struct mlxsw_sp_rt6, list);
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
 	return err;
 }
 
 static void
 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct fib6_info *rt)
+				struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int i;
 
-	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
-	if (WARN_ON(!mlxsw_sp_rt6))
-		return;
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
+							   rt_arr[i]);
+		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
+			continue;
+
+		fib6_entry->nrt6--;
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
 
-	fib6_entry->nrt6--;
-	list_del(&mlxsw_sp_rt6->list);
 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
 }
 
 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
@@ -5354,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
-			   struct fib6_info *rt)
+			   struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
-	int err;
+	int err, i;
 
 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
 	if (!fib6_entry)
 		return ERR_PTR(-ENOMEM);
 	fib_entry = &fib6_entry->common;
 
-	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
-	if (IS_ERR(mlxsw_sp_rt6)) {
-		err = PTR_ERR(mlxsw_sp_rt6);
-		goto err_rt6_create;
+	INIT_LIST_HEAD(&fib6_entry->rt6_list);
+
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+		if (IS_ERR(mlxsw_sp_rt6)) {
+			err = PTR_ERR(mlxsw_sp_rt6);
+			goto err_rt6_create;
+		}
+		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+		fib6_entry->nrt6++;
 	}
 
-	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
+	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
 
-	INIT_LIST_HEAD(&fib6_entry->rt6_list);
-	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
-	fib6_entry->nrt6 = 1;
 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
 	if (err)
 		goto err_nexthop6_group_get;
@@ -5386,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 	return fib6_entry;
 
 err_nexthop6_group_get:
-	list_del(&mlxsw_sp_rt6->list);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	i = nrt6;
 err_rt6_create:
+	for (i--; i >= 0; i--) {
+		fib6_entry->nrt6--;
+		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+					       struct mlxsw_sp_rt6, list);
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
 	kfree(fib6_entry);
 	return ERR_PTR(err);
 }
@@ -5431,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 
 static int
 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
-			       bool replace)
+			       bool *p_replace)
 {
 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
-	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
 
-	if (replace && WARN_ON(!fib6_entry))
-		return -EINVAL;
+	if (*p_replace && !fib6_entry)
+		*p_replace = false;
 
 	if (fib6_entry) {
 		list_add_tail(&new6_entry->common.list,
@@ -5475,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
 
 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib6_entry *fib6_entry,
-					 bool replace)
+					 bool *p_replace)
 {
 	int err;
 
-	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
 	if (err)
 		return err;
 
@@ -5552,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct fib6_info *rt, bool replace)
+				    struct fib6_info **rt_arr,
+				    unsigned int nrt6, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct fib6_info *rt = rt_arr[0];
 	int err;
 
 	if (mlxsw_sp->router->aborted)
@@ -5580,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 	 */
 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
 	if (fib6_entry) {
-		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
+						      rt_arr, nrt6);
 		if (err)
 			goto err_fib6_entry_nexthop_add;
 		return 0;
 	}
 
-	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
+						nrt6);
 	if (IS_ERR(fib6_entry)) {
 		err = PTR_ERR(fib6_entry);
 		goto err_fib6_entry_create;
 	}
 
-	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
 	if (err)
 		goto err_fib6_node_entry_link;
 
@@ -5609,10 +5641,12 @@ err_fib6_entry_nexthop_add:
 }
 
 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
-				     struct fib6_info *rt)
+				     struct fib6_info **rt_arr,
+				     unsigned int nrt6)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct fib6_info *rt = rt_arr[0];
 
 	if (mlxsw_sp->router->aborted)
 		return;
@@ -5624,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!fib6_entry))
 		return;
 
-	/* If route is part of a multipath entry, but not the last one
-	 * removed, then only reduce its nexthop group.
+	/* If not all the nexthops are deleted, then only reduce the nexthop
+	 * group.
 	 */
-	if (!list_is_singular(&fib6_entry->rt6_list)) {
-		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+	if (nrt6 != fib6_entry->nrt6) {
+		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
+						nrt6);
 		return;
 	}
 
@@ -5889,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
 }
 
+struct mlxsw_sp_fib6_event_work {
+	struct fib6_info **rt_arr;
+	unsigned int nrt6;
+};
+
 struct mlxsw_sp_fib_event_work {
 	struct work_struct work;
 	union {
-		struct fib6_entry_notifier_info fen6_info;
+		struct mlxsw_sp_fib6_event_work fib6_work;
 		struct fib_entry_notifier_info fen_info;
 		struct fib_rule_notifier_info fr_info;
 		struct fib_nh_notifier_info fnh_info;
@@ -5903,6 +5943,54 @@ struct mlxsw_sp_fib_event_work {
 	unsigned long event;
 };
 
+static int
+mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
+			       struct fib6_entry_notifier_info *fen6_info)
+{
+	struct fib6_info *rt = fen6_info->rt;
+	struct fib6_info **rt_arr;
+	struct fib6_info *iter;
+	unsigned int nrt6;
+	int i = 0;
+
+	nrt6 = fen6_info->nsiblings + 1;
+
+	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
+	if (!rt_arr)
+		return -ENOMEM;
+
+	fib6_work->rt_arr = rt_arr;
+	fib6_work->nrt6 = nrt6;
+
+	rt_arr[0] = rt;
+	fib6_info_hold(rt);
+
+	if (!fen6_info->nsiblings)
+		return 0;
+
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
+		if (i == fen6_info->nsiblings)
+			break;
+
+		rt_arr[i + 1] = iter;
+		fib6_info_hold(iter);
+		i++;
+	}
+	WARN_ON_ONCE(i != fen6_info->nsiblings);
+
+	return 0;
+}
+
+static void
+mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
+{
+	int i;
+
+	for (i = 0; i < fib6_work->nrt6; i++)
+		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
+	kfree(fib6_work->rt_arr);
+}
+
 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_fib_event_work *fib_work =
@@ -5961,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
-					       fib_work->fen6_info.rt, replace);
+					       fib_work->fib6_work.rt_arr,
+					       fib_work->fib6_work.nrt6,
+					       replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
-		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
-		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		mlxsw_sp_router_fib6_del(mlxsw_sp,
+					 fib_work->fib6_work.rt_arr,
+					 fib_work->fib6_work.nrt6);
+		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
 		break;
 	case FIB_EVENT_RULE_ADD:
 		/* if we get here, a rule was added that we do not support.
@@ -6061,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
 	}
 }
 
-static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
-				       struct fib_notifier_info *info)
+static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+				      struct fib_notifier_info *info)
 {
 	struct fib6_entry_notifier_info *fen6_info;
+	int err;
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
-		fib_work->fen6_info = *fen6_info;
-		fib6_info_hold(fib_work->fen6_info.rt);
+		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
+						     fen6_info);
+		if (err)
+			return err;
 		break;
 	}
+
+	return 0;
 }
 
 static void
@@ -6185,6 +6280,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
 				return notifier_from_errno(-EINVAL);
 			}
+			if (fen_info->fi->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
+		} else if (info->family == AF_INET6) {
+			struct fib6_entry_notifier_info *fen6_info;
+
+			fen6_info = container_of(info,
+						 struct fib6_entry_notifier_info,
+						 info);
+			if (fen6_info->rt->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
 		}
 		break;
 	}
@@ -6203,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 		break;
 	case AF_INET6:
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
-		mlxsw_sp_router_fib6_event(fib_work, info);
+		err = mlxsw_sp_router_fib6_event(fib_work, info);
+		if (err)
+			goto err_fib_event;
 		break;
 	case RTNL_FAMILY_IP6MR:
 	case RTNL_FAMILY_IPMR:
@@ -6215,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	mlxsw_core_schedule_work(&fib_work->work);
 
 	return NOTIFY_DONE;
+
+err_fib_event:
+	kfree(fib_work);
+	return NOTIFY_BAD;
 }
 
 struct mlxsw_sp_rif *
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index fc4f19167262..bdab96f5bc70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 	u64 len;
 	int err;
 
+	memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
 	if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
 		return NETDEV_TX_BUSY;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 451216dd7f6b..19202bdb5105 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -17,6 +17,8 @@ enum {
 	MLXSW_TRAP_ID_MVRP = 0x15,
 	MLXSW_TRAP_ID_RPVST = 0x16,
 	MLXSW_TRAP_ID_DHCP = 0x19,
+	MLXSW_TRAP_ID_PTP0 = 0x28,
+	MLXSW_TRAP_ID_PTP1 = 0x29,
 	MLXSW_TRAP_ID_IGMP_QUERY = 0x30,
 	MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31,
 	MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
@@ -76,6 +78,10 @@ enum {
 enum mlxsw_event_trap_id {
 	/* Port Up/Down event generated by hardware */
 	MLXSW_TRAP_ID_PUDE = 0x8,
+	/* PTP Ingress FIFO has a new entry */
+	MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
+	/* PTP Egress FIFO has a new entry */
+	MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E,
 };
 
 #endif /* _MLXSW_TRAP_H */
diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
index cb52a3b128ae..9a36c26095c8 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR MIT)
 obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
 mscc_ocelot_common-y := ocelot.o ocelot_io.o
-mscc_ocelot_common-y += ocelot_regs.o
+mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o
 obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 02ad11e0b0d8..b71e4ecbe469 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -22,6 +22,7 @@
 #include <net/switchdev.h>
 
 #include "ocelot.h"
+#include "ocelot_ace.h"
 
 #define TABLE_UPDATE_SLEEP_US 10
 #define TABLE_UPDATE_TIMEOUT_US 100000
@@ -130,6 +131,13 @@ static void ocelot_mact_init(struct ocelot *ocelot)
 	ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
 }
 
+static void ocelot_vcap_enable(struct ocelot *ocelot, struct ocelot_port *port)
+{
+	ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA |
+			 ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa),
+			 ANA_PORT_VCAP_S2_CFG, port->chip_port);
+}
+
 static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot)
 {
 	return ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
@@ -884,6 +892,13 @@ static int ocelot_set_features(struct net_device *dev,
 	struct ocelot_port *port = netdev_priv(dev);
 	netdev_features_t changed = dev->features ^ features;
 
+	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+	    port->tc.offload_cnt) {
+		netdev_err(dev,
+			   "Cannot disable HW TC offload while offloads active\n");
+		return -EBUSY;
+	}
+
 	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER)
 		ocelot_vlan_mode(port, features);
 
@@ -917,6 +932,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_vlan_rx_kill_vid		= ocelot_vlan_rx_kill_vid,
 	.ndo_set_features		= ocelot_set_features,
 	.ndo_get_port_parent_id		= ocelot_get_port_parent_id,
+	.ndo_setup_tc			= ocelot_setup_tc,
 };
 
 static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -1636,8 +1652,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 	dev->netdev_ops = &ocelot_port_netdev_ops;
 	dev->ethtool_ops = &ocelot_ethtool_ops;
 
-	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS;
-	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS |
+		NETIF_F_HW_TC;
+	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
 
 	memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
 	dev->dev_addr[ETH_ALEN - 1] += port;
@@ -1653,6 +1670,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 	/* Basic L2 initialization */
 	ocelot_vlan_port_apply(ocelot, ocelot_port);
 
+	/* Enable vcap lookups */
+	ocelot_vcap_enable(ocelot, ocelot_port);
+
 	return 0;
 
 err_register_netdev:
@@ -1687,6 +1707,7 @@ int ocelot_init(struct ocelot *ocelot)
 
 	ocelot_mact_init(ocelot);
 	ocelot_vlan_init(ocelot);
+	ocelot_ace_init(ocelot);
 
 	for (port = 0; port < ocelot->num_phys_ports; port++) {
 		/* Clear all counters (5 groups) */
@@ -1799,6 +1820,7 @@ void ocelot_deinit(struct ocelot *ocelot)
 {
 	destroy_workqueue(ocelot->stats_queue);
 	mutex_destroy(&ocelot->stats_lock);
+	ocelot_ace_deinit();
 }
 EXPORT_SYMBOL(ocelot_deinit);
 
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 541fe41e60b0..f7eeb4806897 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -22,6 +22,7 @@
 #include "ocelot_rew.h"
 #include "ocelot_sys.h"
 #include "ocelot_qs.h"
+#include "ocelot_tc.h"
 
 #define PGID_AGGR    64
 #define PGID_SRC     80
@@ -68,6 +69,7 @@ enum ocelot_target {
 	QSYS,
 	REW,
 	SYS,
+	S2,
 	HSIO,
 	TARGET_MAX,
 };
@@ -334,6 +336,13 @@ enum ocelot_reg {
 	SYS_CM_DATA_RD,
 	SYS_CM_OP,
 	SYS_CM_DATA,
+	S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
+	S2_CORE_MV_CFG,
+	S2_CACHE_ENTRY_DAT,
+	S2_CACHE_MASK_DAT,
+	S2_CACHE_ACTION_DAT,
+	S2_CACHE_CNT_DAT,
+	S2_CACHE_TG_DAT,
 };
 
 enum ocelot_regfield {
@@ -454,6 +463,8 @@ struct ocelot_port {
 
 	phy_interface_t phy_mode;
 	struct phy *serdes;
+
+	struct ocelot_port_tc tc;
 };
 
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
new file mode 100644
index 000000000000..39aca1ab4687
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ace.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include <linux/iopoll.h>
+#include <linux/proc_fs.h>
+
+#include "ocelot_ace.h"
+#include "ocelot_vcap.h"
+#include "ocelot_s2.h"
+
+#define OCELOT_POLICER_DISCARD 0x17f
+
+static struct ocelot_acl_block *acl_block;
+
+struct vcap_props {
+	const char *name; /* Symbolic name */
+	u16 tg_width; /* Type-group width (in bits) */
+	u16 sw_count; /* Sub word count */
+	u16 entry_count; /* Entry count */
+	u16 entry_words; /* Number of entry words */
+	u16 entry_width; /* Entry width (in bits) */
+	u16 action_count; /* Action count */
+	u16 action_words; /* Number of action words */
+	u16 action_width; /* Action width (in bits) */
+	u16 action_type_width; /* Action type width (in bits) */
+	struct {
+		u16 width; /* Action type width (in bits) */
+		u16 count; /* Action type sub word count */
+	} action_table[2];
+	u16 counter_words; /* Number of counter words */
+	u16 counter_width; /* Counter width (in bits) */
+};
+
+#define ENTRY_WIDTH 32
+#define BITS_TO_32BIT(x) (1 + (((x) - 1) / ENTRY_WIDTH))
+
+static const struct vcap_props vcap_is2 = {
+	.name = "IS2",
+	.tg_width = 2,
+	.sw_count = 4,
+	.entry_count = VCAP_IS2_CNT,
+	.entry_words = BITS_TO_32BIT(VCAP_IS2_ENTRY_WIDTH),
+	.entry_width = VCAP_IS2_ENTRY_WIDTH,
+	.action_count = (VCAP_IS2_CNT + VCAP_PORT_CNT + 2),
+	.action_words = BITS_TO_32BIT(VCAP_IS2_ACTION_WIDTH),
+	.action_width = (VCAP_IS2_ACTION_WIDTH),
+	.action_type_width = 1,
+	.action_table = {
+		{
+			.width = (IS2_AO_ACL_ID + IS2_AL_ACL_ID),
+			.count = 2
+		},
+		{
+			.width = 6,
+			.count = 4
+		},
+	},
+	.counter_words = BITS_TO_32BIT(4 * ENTRY_WIDTH),
+	.counter_width = ENTRY_WIDTH,
+};
+
+enum vcap_sel {
+	VCAP_SEL_ENTRY = 0x1,
+	VCAP_SEL_ACTION = 0x2,
+	VCAP_SEL_COUNTER = 0x4,
+	VCAP_SEL_ALL = 0x7,
+};
+
+enum vcap_cmd {
+	VCAP_CMD_WRITE = 0, /* Copy from Cache to TCAM */
+	VCAP_CMD_READ = 1, /* Copy from TCAM to Cache */
+	VCAP_CMD_MOVE_UP = 2, /* Move <count> up */
+	VCAP_CMD_MOVE_DOWN = 3, /* Move <count> down */
+	VCAP_CMD_INITIALIZE = 4, /* Write all (from cache) */
+};
+
+#define VCAP_ENTRY_WIDTH 12 /* Max entry width (32bit words) */
+#define VCAP_COUNTER_WIDTH 4 /* Max counter width (32bit words) */
+
+struct vcap_data {
+	u32 entry[VCAP_ENTRY_WIDTH]; /* ENTRY_DAT */
+	u32 mask[VCAP_ENTRY_WIDTH]; /* MASK_DAT */
+	u32 action[VCAP_ENTRY_WIDTH]; /* ACTION_DAT */
+	u32 counter[VCAP_COUNTER_WIDTH]; /* CNT_DAT */
+	u32 tg; /* TG_DAT */
+	u32 type; /* Action type */
+	u32 tg_sw; /* Current type-group */
+	u32 cnt; /* Current counter */
+	u32 key_offset; /* Current entry offset */
+	u32 action_offset; /* Current action offset */
+	u32 counter_offset; /* Current counter offset */
+	u32 tg_value; /* Current type-group value */
+	u32 tg_mask; /* Current type-group mask */
+};
+
+static u32 vcap_s2_read_update_ctrl(struct ocelot *oc)
+{
+	return ocelot_read(oc, S2_CORE_UPDATE_CTRL);
+}
+
+static void vcap_cmd(struct ocelot *oc, u16 ix, int cmd, int sel)
+{
+	u32 value = (S2_CORE_UPDATE_CTRL_UPDATE_CMD(cmd) |
+		     S2_CORE_UPDATE_CTRL_UPDATE_ADDR(ix) |
+		     S2_CORE_UPDATE_CTRL_UPDATE_SHOT);
+
+	if ((sel & VCAP_SEL_ENTRY) && ix >= vcap_is2.entry_count)
+		return;
+
+	if (!(sel & VCAP_SEL_ENTRY))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS;
+
+	if (!(sel & VCAP_SEL_ACTION))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS;
+
+	if (!(sel & VCAP_SEL_COUNTER))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS;
+
+	ocelot_write(oc, value, S2_CORE_UPDATE_CTRL);
+	readx_poll_timeout(vcap_s2_read_update_ctrl, oc, value,
+				(value & S2_CORE_UPDATE_CTRL_UPDATE_SHOT) == 0,
+				10, 100000);
+}
+
+/* Convert from 0-based row to VCAP entry row and run command */
+static void vcap_row_cmd(struct ocelot *oc, u32 row, int cmd, int sel)
+{
+	vcap_cmd(oc, vcap_is2.entry_count - row - 1, cmd, sel);
+}
+
+static void vcap_entry2cache(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i;
+
+	for (i = 0; i < vcap_is2.entry_words; i++) {
+		ocelot_write_rix(oc, data->entry[i], S2_CACHE_ENTRY_DAT, i);
+		ocelot_write_rix(oc, ~data->mask[i], S2_CACHE_MASK_DAT, i);
+	}
+	ocelot_write(oc, data->tg, S2_CACHE_TG_DAT);
+}
+
+static void vcap_cache2entry(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i;
+
+	for (i = 0; i < vcap_is2.entry_words; i++) {
+		data->entry[i] = ocelot_read_rix(oc, S2_CACHE_ENTRY_DAT, i);
+		// Invert mask
+		data->mask[i] = ~ocelot_read_rix(oc, S2_CACHE_MASK_DAT, i);
+	}
+	data->tg = ocelot_read(oc, S2_CACHE_TG_DAT);
+}
+
+static void vcap_action2cache(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i, width, mask;
+
+	/* Encode action type */
+	width = vcap_is2.action_type_width;
+	if (width) {
+		mask = GENMASK(width, 0);
+		data->action[0] = ((data->action[0] & ~mask) | data->type);
+	}
+
+	for (i = 0; i < vcap_is2.action_words; i++)
+		ocelot_write_rix(oc, data->action[i], S2_CACHE_ACTION_DAT, i);
+
+	for (i = 0; i < vcap_is2.counter_words; i++)
+		ocelot_write_rix(oc, data->counter[i], S2_CACHE_CNT_DAT, i);
+}
+
+static void vcap_cache2action(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i, width;
+
+	for (i = 0; i < vcap_is2.action_words; i++)
+		data->action[i] = ocelot_read_rix(oc, S2_CACHE_ACTION_DAT, i);
+
+	for (i = 0; i < vcap_is2.counter_words; i++)
+		data->counter[i] = ocelot_read_rix(oc, S2_CACHE_CNT_DAT, i);
+
+	/* Extract action type */
+	width = vcap_is2.action_type_width;
+	data->type = (width ? (data->action[0] & GENMASK(width, 0)) : 0);
+}
+
+/* Calculate offsets for entry */
+static void is2_data_get(struct vcap_data *data, int ix)
+{
+	u32 i, col, offset, count, cnt, base, width = vcap_is2.tg_width;
+
+	count = (data->tg_sw == VCAP_TG_HALF ? 2 : 4);
+	col = (ix % 2);
+	cnt = (vcap_is2.sw_count / count);
+	base = (vcap_is2.sw_count - col * cnt - cnt);
+	data->tg_value = 0;
+	data->tg_mask = 0;
+	for (i = 0; i < cnt; i++) {
+		offset = ((base + i) * width);
+		data->tg_value |= (data->tg_sw << offset);
+		data->tg_mask |= GENMASK(offset + width - 1, offset);
+	}
+
+	/* Calculate key/action/counter offsets */
+	col = (count - col - 1);
+	data->key_offset = (base * vcap_is2.entry_width) / vcap_is2.sw_count;
+	data->counter_offset = (cnt * col * vcap_is2.counter_width);
+	i = data->type;
+	width = vcap_is2.action_table[i].width;
+	cnt = vcap_is2.action_table[i].count;
+	data->action_offset =
+		(((cnt * col * width) / count) + vcap_is2.action_type_width);
+}
+
+static void vcap_data_set(u32 *data, u32 offset, u32 len, u32 value)
+{
+	u32 i, v, m;
+
+	for (i = 0; i < len; i++, offset++) {
+		v = data[offset / ENTRY_WIDTH];
+		m = (1 << (offset % ENTRY_WIDTH));
+		if (value & (1 << i))
+			v |= m;
+		else
+			v &= ~m;
+		data[offset / ENTRY_WIDTH] = v;
+	}
+}
+
+static u32 vcap_data_get(u32 *data, u32 offset, u32 len)
+{
+	u32 i, v, m, value = 0;
+
+	for (i = 0; i < len; i++, offset++) {
+		v = data[offset / ENTRY_WIDTH];
+		m = (1 << (offset % ENTRY_WIDTH));
+		if (v & m)
+			value |= (1 << i);
+	}
+	return value;
+}
+
+static void vcap_key_set(struct vcap_data *data, u32 offset, u32 width,
+			 u32 value, u32 mask)
+{
+	vcap_data_set(data->entry, offset + data->key_offset, width, value);
+	vcap_data_set(data->mask, offset + data->key_offset, width, mask);
+}
+
+static void vcap_key_bytes_set(struct vcap_data *data, u32 offset, u8 *val,
+			       u8 *msk, u32 count)
+{
+	u32 i, j, n = 0, value = 0, mask = 0;
+
+	/* Data wider than 32 bits are split up in chunks of maximum 32 bits.
+	 * The 32 LSB of the data are written to the 32 MSB of the TCAM.
+	 */
+	offset += (count * 8);
+	for (i = 0; i < count; i++) {
+		j = (count - i - 1);
+		value += (val[j] << n);
+		mask += (msk[j] << n);
+		n += 8;
+		if (n == ENTRY_WIDTH || (i + 1) == count) {
+			offset -= n;
+			vcap_key_set(data, offset, n, value, mask);
+			n = 0;
+			value = 0;
+			mask = 0;
+		}
+	}
+}
+
+static void vcap_key_l4_port_set(struct vcap_data *data, u32 offset,
+				 struct ocelot_vcap_udp_tcp *port)
+{
+	vcap_key_set(data, offset, 16, port->value, port->mask);
+}
+
+static void vcap_key_bit_set(struct vcap_data *data, u32 offset,
+			     enum ocelot_vcap_bit val)
+{
+	vcap_key_set(data, offset, 1, val == OCELOT_VCAP_BIT_1 ? 1 : 0,
+		     val == OCELOT_VCAP_BIT_ANY ? 0 : 1);
+}
+
+#define VCAP_KEY_SET(fld, val, msk) \
+	vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, val, msk)
+#define VCAP_KEY_ANY_SET(fld) \
+	vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, 0, 0)
+#define VCAP_KEY_BIT_SET(fld, val) vcap_key_bit_set(&data, IS2_HKO_##fld, val)
+#define VCAP_KEY_BYTES_SET(fld, val, msk) \
+	vcap_key_bytes_set(&data, IS2_HKO_##fld, val, msk, IS2_HKL_##fld / 8)
+
+static void vcap_action_set(struct vcap_data *data, u32 offset, u32 width,
+			    u32 value)
+{
+	vcap_data_set(data->action, offset + data->action_offset, width, value);
+}
+
+#define VCAP_ACT_SET(fld, val) \
+	vcap_action_set(data, IS2_AO_##fld, IS2_AL_##fld, val)
+
+static void is2_action_set(struct vcap_data *data,
+			   enum ocelot_ace_action action)
+{
+	switch (action) {
+	case OCELOT_ACL_ACTION_DROP:
+		VCAP_ACT_SET(PORT_MASK, 0x0);
+		VCAP_ACT_SET(MASK_MODE, 0x1);
+		VCAP_ACT_SET(POLICE_ENA, 0x1);
+		VCAP_ACT_SET(POLICE_IDX, OCELOT_POLICER_DISCARD);
+		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
+		VCAP_ACT_SET(CPU_COPY_ENA, 0x0);
+		break;
+	case OCELOT_ACL_ACTION_TRAP:
+		VCAP_ACT_SET(PORT_MASK, 0x0);
+		VCAP_ACT_SET(MASK_MODE, 0x0);
+		VCAP_ACT_SET(POLICE_ENA, 0x0);
+		VCAP_ACT_SET(POLICE_IDX, 0x0);
+		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
+		VCAP_ACT_SET(CPU_COPY_ENA, 0x1);
+		break;
+	}
+}
+
+static void is2_entry_set(struct ocelot *ocelot, int ix,
+			  struct ocelot_ace_rule *ace)
+{
+	u32 val, msk, type, type_mask = 0xf, i, count;
+	struct ocelot_ace_vlan *tag = &ace->vlan;
+	struct ocelot_vcap_u64 payload;
+	struct vcap_data data;
+	int row = (ix / 2);
+
+	memset(&payload, 0, sizeof(payload));
+	memset(&data, 0, sizeof(data));
+
+	/* Read row */
+	vcap_row_cmd(ocelot, row, VCAP_CMD_READ, VCAP_SEL_ALL);
+	vcap_cache2entry(ocelot, &data);
+	vcap_cache2action(ocelot, &data);
+
+	data.tg_sw = VCAP_TG_HALF;
+	is2_data_get(&data, ix);
+	data.tg = (data.tg & ~data.tg_mask);
+	if (ace->prio != 0)
+		data.tg |= data.tg_value;
+
+	data.type = IS2_ACTION_TYPE_NORMAL;
+
+	VCAP_KEY_ANY_SET(PAG);
+	VCAP_KEY_SET(IGR_PORT_MASK, 0, ~BIT(ace->chip_port));
+	VCAP_KEY_BIT_SET(FIRST, OCELOT_VCAP_BIT_1);
+	VCAP_KEY_BIT_SET(HOST_MATCH, OCELOT_VCAP_BIT_ANY);
+	VCAP_KEY_BIT_SET(L2_MC, ace->dmac_mc);
+	VCAP_KEY_BIT_SET(L2_BC, ace->dmac_bc);
+	VCAP_KEY_BIT_SET(VLAN_TAGGED, tag->tagged);
+	VCAP_KEY_SET(VID, tag->vid.value, tag->vid.mask);
+	VCAP_KEY_SET(PCP, tag->pcp.value[0], tag->pcp.mask[0]);
+	VCAP_KEY_BIT_SET(DEI, tag->dei);
+
+	switch (ace->type) {
+	case OCELOT_ACE_TYPE_ETYPE: {
+		struct ocelot_ace_frame_etype *etype = &ace->frame.etype;
+
+		type = IS2_TYPE_ETYPE;
+		VCAP_KEY_BYTES_SET(L2_DMAC, etype->dmac.value,
+				   etype->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, etype->smac.value,
+				   etype->smac.mask);
+		VCAP_KEY_BYTES_SET(MAC_ETYPE_ETYPE, etype->etype.value,
+				   etype->etype.mask);
+		VCAP_KEY_ANY_SET(MAC_ETYPE_L2_PAYLOAD); // Clear unused bits
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ETYPE_L2_PAYLOAD,
+				   etype->data.value, etype->data.mask, 2);
+		break;
+	}
+	case OCELOT_ACE_TYPE_LLC: {
+		struct ocelot_ace_frame_llc *llc = &ace->frame.llc;
+
+		type = IS2_TYPE_LLC;
+		VCAP_KEY_BYTES_SET(L2_DMAC, llc->dmac.value, llc->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, llc->smac.value, llc->smac.mask);
+		for (i = 0; i < 4; i++) {
+			payload.value[i] = llc->llc.value[i];
+			payload.mask[i] = llc->llc.mask[i];
+		}
+		VCAP_KEY_BYTES_SET(MAC_LLC_L2_LLC, payload.value, payload.mask);
+		break;
+	}
+	case OCELOT_ACE_TYPE_SNAP: {
+		struct ocelot_ace_frame_snap *snap = &ace->frame.snap;
+
+		type = IS2_TYPE_SNAP;
+		VCAP_KEY_BYTES_SET(L2_DMAC, snap->dmac.value, snap->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, snap->smac.value, snap->smac.mask);
+		VCAP_KEY_BYTES_SET(MAC_SNAP_L2_SNAP,
+				   ace->frame.snap.snap.value,
+				   ace->frame.snap.snap.mask);
+		break;
+	}
+	case OCELOT_ACE_TYPE_ARP: {
+		struct ocelot_ace_frame_arp *arp = &ace->frame.arp;
+
+		type = IS2_TYPE_ARP;
+		VCAP_KEY_BYTES_SET(MAC_ARP_L2_SMAC, arp->smac.value,
+				   arp->smac.mask);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_ADDR_SPACE_OK, arp->ethernet);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_PROTO_SPACE_OK, arp->ip);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_LEN_OK, arp->length);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_TGT_MATCH, arp->dmac_match);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_SENDER_MATCH, arp->smac_match);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_OPCODE_UNKNOWN, arp->unknown);
+
+		/* OPCODE is inverse, bit 0 is reply flag, bit 1 is RARP flag */
+		val = ((arp->req == OCELOT_VCAP_BIT_0 ? 1 : 0) |
+		       (arp->arp == OCELOT_VCAP_BIT_0 ? 2 : 0));
+		msk = ((arp->req == OCELOT_VCAP_BIT_ANY ? 0 : 1) |
+		       (arp->arp == OCELOT_VCAP_BIT_ANY ? 0 : 2));
+		VCAP_KEY_SET(MAC_ARP_ARP_OPCODE, val, msk);
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_DIP,
+				   arp->dip.value.addr, arp->dip.mask.addr, 4);
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_SIP,
+				   arp->sip.value.addr, arp->sip.mask.addr, 4);
+		VCAP_KEY_ANY_SET(MAC_ARP_DIP_EQ_SIP);
+		break;
+	}
+	case OCELOT_ACE_TYPE_IPV4:
+	case OCELOT_ACE_TYPE_IPV6: {
+		enum ocelot_vcap_bit sip_eq_dip, sport_eq_dport, seq_zero, tcp;
+		enum ocelot_vcap_bit ttl, fragment, options, tcp_ack, tcp_urg;
+		enum ocelot_vcap_bit tcp_fin, tcp_syn, tcp_rst, tcp_psh;
+		struct ocelot_ace_frame_ipv4 *ipv4 = NULL;
+		struct ocelot_ace_frame_ipv6 *ipv6 = NULL;
+		struct ocelot_vcap_udp_tcp *sport, *dport;
+		struct ocelot_vcap_ipv4 sip, dip;
+		struct ocelot_vcap_u8 proto, ds;
+		struct ocelot_vcap_u48 *ip_data;
+
+		if (ace->type == OCELOT_ACE_TYPE_IPV4) {
+			ipv4 = &ace->frame.ipv4;
+			ttl = ipv4->ttl;
+			fragment = ipv4->fragment;
+			options = ipv4->options;
+			proto = ipv4->proto;
+			ds = ipv4->ds;
+			ip_data = &ipv4->data;
+			sip = ipv4->sip;
+			dip = ipv4->dip;
+			sport = &ipv4->sport;
+			dport = &ipv4->dport;
+			tcp_fin = ipv4->tcp_fin;
+			tcp_syn = ipv4->tcp_syn;
+			tcp_rst = ipv4->tcp_rst;
+			tcp_psh = ipv4->tcp_psh;
+			tcp_ack = ipv4->tcp_ack;
+			tcp_urg = ipv4->tcp_urg;
+			sip_eq_dip = ipv4->sip_eq_dip;
+			sport_eq_dport = ipv4->sport_eq_dport;
+			seq_zero = ipv4->seq_zero;
+		} else {
+			ipv6 = &ace->frame.ipv6;
+			ttl = ipv6->ttl;
+			fragment = OCELOT_VCAP_BIT_ANY;
+			options = OCELOT_VCAP_BIT_ANY;
+			proto = ipv6->proto;
+			ds = ipv6->ds;
+			ip_data = &ipv6->data;
+			for (i = 0; i < 8; i++) {
+				val = ipv6->sip.value[i + 8];
+				msk = ipv6->sip.mask[i + 8];
+				if (i < 4) {
+					dip.value.addr[i] = val;
+					dip.mask.addr[i] = msk;
+				} else {
+					sip.value.addr[i - 4] = val;
+					sip.mask.addr[i - 4] = msk;
+				}
+			}
+			sport = &ipv6->sport;
+			dport = &ipv6->dport;
+			tcp_fin = ipv6->tcp_fin;
+			tcp_syn = ipv6->tcp_syn;
+			tcp_rst = ipv6->tcp_rst;
+			tcp_psh = ipv6->tcp_psh;
+			tcp_ack = ipv6->tcp_ack;
+			tcp_urg = ipv6->tcp_urg;
+			sip_eq_dip = ipv6->sip_eq_dip;
+			sport_eq_dport = ipv6->sport_eq_dport;
+			seq_zero = ipv6->seq_zero;
+		}
+
+		VCAP_KEY_BIT_SET(IP4,
+				 ipv4 ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
+		VCAP_KEY_BIT_SET(L3_FRAGMENT, fragment);
+		VCAP_KEY_ANY_SET(L3_FRAG_OFS_GT0);
+		VCAP_KEY_BIT_SET(L3_OPTIONS, options);
+		VCAP_KEY_BIT_SET(L3_TTL_GT0, ttl);
+		VCAP_KEY_BYTES_SET(L3_TOS, ds.value, ds.mask);
+		vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_DIP, dip.value.addr,
+				   dip.mask.addr, 4);
+		vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_SIP, sip.value.addr,
+				   sip.mask.addr, 4);
+		VCAP_KEY_BIT_SET(DIP_EQ_SIP, sip_eq_dip);
+		val = proto.value[0];
+		msk = proto.mask[0];
+		type = IS2_TYPE_IP_UDP_TCP;
+		if (msk == 0xff && (val == 6 || val == 17)) {
+			/* UDP/TCP protocol match */
+			tcp = (val == 6 ?
+			       OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_TCP, tcp);
+			vcap_key_l4_port_set(&data,
+					     IS2_HKO_IP4_TCP_UDP_L4_DPORT,
+					     dport);
+			vcap_key_l4_port_set(&data,
+					     IS2_HKO_IP4_TCP_UDP_L4_SPORT,
+					     sport);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_RNG);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_SPORT_EQ_DPORT,
+					 sport_eq_dport);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_SEQUENCE_EQ0, seq_zero);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_FIN, tcp_fin);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_SYN, tcp_syn);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_RST, tcp_rst);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_PSH, tcp_psh);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_ACK, tcp_ack);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_URG, tcp_urg);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_DOM);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_VER);
+		} else {
+			if (msk == 0) {
+				/* Any IP protocol match */
+				type_mask = IS2_TYPE_MASK_IP_ANY;
+			} else {
+				/* Non-UDP/TCP protocol match */
+				type = IS2_TYPE_IP_OTHER;
+				for (i = 0; i < 6; i++) {
+					payload.value[i] = ip_data->value[i];
+					payload.mask[i] = ip_data->mask[i];
+				}
+			}
+			VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PROTO, proto.value,
+					   proto.mask);
+			VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PAYLOAD, payload.value,
+					   payload.mask);
+		}
+		break;
+	}
+	case OCELOT_ACE_TYPE_ANY:
+	default:
+		type = 0;
+		type_mask = 0;
+		count = (vcap_is2.entry_width / 2);
+		for (i = (IS2_HKO_PCP + IS2_HKL_PCP); i < count;
+		     i += ENTRY_WIDTH) {
+			/* Clear entry data */
+			vcap_key_set(&data, i, min(32u, count - i), 0, 0);
+		}
+		break;
+	}
+
+	VCAP_KEY_SET(TYPE, type, type_mask);
+	is2_action_set(&data, ace->action);
+	vcap_data_set(data.counter, data.counter_offset, vcap_is2.counter_width,
+		      ace->stats.pkts);
+
+	/* Write row */
+	vcap_entry2cache(ocelot, &data);
+	vcap_action2cache(ocelot, &data);
+	vcap_row_cmd(ocelot, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+}
+
+static void is2_entry_get(struct ocelot_ace_rule *rule, int ix)
+{
+	struct ocelot *op = rule->port->ocelot;
+	struct vcap_data data;
+	int row = (ix / 2);
+	u32 cnt;
+
+	vcap_row_cmd(op, row, VCAP_CMD_READ, VCAP_SEL_COUNTER);
+	vcap_cache2action(op, &data);
+	data.tg_sw = VCAP_TG_HALF;
+	is2_data_get(&data, ix);
+	cnt = vcap_data_get(data.counter, data.counter_offset,
+			    vcap_is2.counter_width);
+
+	rule->stats.pkts = cnt;
+}
+
+static void ocelot_ace_rule_add(struct ocelot_acl_block *block,
+				struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	struct list_head *pos, *n;
+
+	block->count++;
+
+	if (list_empty(&block->rules)) {
+		list_add(&rule->list, &block->rules);
+		return;
+	}
+
+	list_for_each_safe(pos, n, &block->rules) {
+		tmp = list_entry(pos, struct ocelot_ace_rule, list);
+		if (rule->prio < tmp->prio)
+			break;
+	}
+	list_add(&rule->list, pos->prev);
+}
+
+static int ocelot_ace_rule_get_index_id(struct ocelot_acl_block *block,
+					struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	int index = -1;
+
+	list_for_each_entry(tmp, &block->rules, list) {
+		++index;
+		if (rule->id == tmp->id)
+			break;
+	}
+	return index;
+}
+
+static struct ocelot_ace_rule*
+ocelot_ace_rule_get_rule_index(struct ocelot_acl_block *block, int index)
+{
+	struct ocelot_ace_rule *tmp;
+	int i = 0;
+
+	list_for_each_entry(tmp, &block->rules, list) {
+		if (i == index)
+			return tmp;
+		++i;
+	}
+
+	return NULL;
+}
+
+int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *ace;
+	int i, index;
+
+	/* Add rule to the linked list */
+	ocelot_ace_rule_add(acl_block, rule);
+
+	/* Get the index of the inserted rule */
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+
+	/* Move down the rules to make place for the new rule */
+	for (i = acl_block->count - 1; i > index; i--) {
+		ace = ocelot_ace_rule_get_rule_index(acl_block, i);
+		is2_entry_set(rule->port->ocelot, i, ace);
+	}
+
+	/* Now insert the new rule */
+	is2_entry_set(rule->port->ocelot, index, rule);
+	return 0;
+}
+
+static void ocelot_ace_rule_del(struct ocelot_acl_block *block,
+				struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	struct list_head *pos, *q;
+
+	list_for_each_safe(pos, q, &block->rules) {
+		tmp = list_entry(pos, struct ocelot_ace_rule, list);
+		if (tmp->id == rule->id) {
+			list_del(pos);
+			kfree(tmp);
+		}
+	}
+
+	block->count--;
+}
+
+int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule del_ace;
+	struct ocelot_ace_rule *ace;
+	int i, index;
+
+	memset(&del_ace, 0, sizeof(del_ace));
+
+	/* Gets index of the rule */
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+
+	/* Delete rule */
+	ocelot_ace_rule_del(acl_block, rule);
+
+	/* Move up all the blocks over the deleted rule */
+	for (i = index; i < acl_block->count; i++) {
+		ace = ocelot_ace_rule_get_rule_index(acl_block, i);
+		is2_entry_set(rule->port->ocelot, i, ace);
+	}
+
+	/* Now delete the last rule, because it is duplicated */
+	is2_entry_set(rule->port->ocelot, acl_block->count, &del_ace);
+
+	return 0;
+}
+
+int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	int index;
+
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+	is2_entry_get(rule, index);
+
+	/* After we get the result we need to clear the counters */
+	tmp = ocelot_ace_rule_get_rule_index(acl_block, index);
+	tmp->stats.pkts = 0;
+	is2_entry_set(rule->port->ocelot, index, tmp);
+
+	return 0;
+}
+
+static struct ocelot_acl_block *ocelot_acl_block_create(struct ocelot *ocelot)
+{
+	struct ocelot_acl_block *block;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return NULL;
+
+	INIT_LIST_HEAD(&block->rules);
+	block->count = 0;
+	block->ocelot = ocelot;
+
+	return block;
+}
+
+static void ocelot_acl_block_destroy(struct ocelot_acl_block *block)
+{
+	kfree(block);
+}
+
+int ocelot_ace_init(struct ocelot *ocelot)
+{
+	struct vcap_data data;
+
+	memset(&data, 0, sizeof(data));
+	vcap_entry2cache(ocelot, &data);
+	ocelot_write(ocelot, vcap_is2.entry_count, S2_CORE_MV_CFG);
+	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE, VCAP_SEL_ENTRY);
+
+	vcap_action2cache(ocelot, &data);
+	ocelot_write(ocelot, vcap_is2.action_count, S2_CORE_MV_CFG);
+	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE,
+		 VCAP_SEL_ACTION | VCAP_SEL_COUNTER);
+
+	/* Create a policer that will drop the frames for the cpu.
+	 * This policer will be used as action in the acl rules to drop
+	 * frames.
+	 */
+	ocelot_write_gix(ocelot, 0x299, ANA_POL_MODE_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x1, ANA_POL_PIR_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_PIR_STATE,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x0, ANA_POL_CIR_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_CIR_STATE,
+			 OCELOT_POLICER_DISCARD);
+
+	acl_block = ocelot_acl_block_create(ocelot);
+
+	return 0;
+}
+
+void ocelot_ace_deinit(void)
+{
+	ocelot_acl_block_destroy(acl_block);
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_ace.h b/drivers/net/ethernet/mscc/ocelot_ace.h
new file mode 100644
index 000000000000..e98944c87259
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ace.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_ACE_H_
+#define _MSCC_OCELOT_ACE_H_
+
+#include "ocelot.h"
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+
+struct ocelot_ipv4 {
+	u8 addr[4];
+};
+
+enum ocelot_vcap_bit {
+	OCELOT_VCAP_BIT_ANY,
+	OCELOT_VCAP_BIT_0,
+	OCELOT_VCAP_BIT_1
+};
+
+struct ocelot_vcap_u8 {
+	u8 value[1];
+	u8 mask[1];
+};
+
+struct ocelot_vcap_u16 {
+	u8 value[2];
+	u8 mask[2];
+};
+
+struct ocelot_vcap_u24 {
+	u8 value[3];
+	u8 mask[3];
+};
+
+struct ocelot_vcap_u32 {
+	u8 value[4];
+	u8 mask[4];
+};
+
+struct ocelot_vcap_u40 {
+	u8 value[5];
+	u8 mask[5];
+};
+
+struct ocelot_vcap_u48 {
+	u8 value[6];
+	u8 mask[6];
+};
+
+struct ocelot_vcap_u64 {
+	u8 value[8];
+	u8 mask[8];
+};
+
+struct ocelot_vcap_u128 {
+	u8 value[16];
+	u8 mask[16];
+};
+
+struct ocelot_vcap_vid {
+	u16 value;
+	u16 mask;
+};
+
+struct ocelot_vcap_ipv4 {
+	struct ocelot_ipv4 value;
+	struct ocelot_ipv4 mask;
+};
+
+struct ocelot_vcap_udp_tcp {
+	u16 value;
+	u16 mask;
+};
+
+enum ocelot_ace_type {
+	OCELOT_ACE_TYPE_ANY,
+	OCELOT_ACE_TYPE_ETYPE,
+	OCELOT_ACE_TYPE_LLC,
+	OCELOT_ACE_TYPE_SNAP,
+	OCELOT_ACE_TYPE_ARP,
+	OCELOT_ACE_TYPE_IPV4,
+	OCELOT_ACE_TYPE_IPV6
+};
+
+struct ocelot_ace_vlan {
+	struct ocelot_vcap_vid vid;    /* VLAN ID (12 bit) */
+	struct ocelot_vcap_u8  pcp;    /* PCP (3 bit) */
+	enum ocelot_vcap_bit dei;    /* DEI */
+	enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */
+};
+
+struct ocelot_ace_frame_etype {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+	struct ocelot_vcap_u16 etype;
+	struct ocelot_vcap_u16 data; /* MAC data */
+};
+
+struct ocelot_ace_frame_llc {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+
+	/* LLC header: DSAP at byte 0, SSAP at byte 1, Control at byte 2 */
+	struct ocelot_vcap_u32 llc;
+};
+
+struct ocelot_ace_frame_snap {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+
+	/* SNAP header: Organization Code at byte 0, Type at byte 3 */
+	struct ocelot_vcap_u40 snap;
+};
+
+struct ocelot_ace_frame_arp {
+	struct ocelot_vcap_u48 smac;
+	enum ocelot_vcap_bit arp;	/* Opcode ARP/RARP */
+	enum ocelot_vcap_bit req;	/* Opcode request/reply */
+	enum ocelot_vcap_bit unknown;    /* Opcode unknown */
+	enum ocelot_vcap_bit smac_match; /* Sender MAC matches SMAC */
+	enum ocelot_vcap_bit dmac_match; /* Target MAC matches DMAC */
+
+	/**< Protocol addr. length 4, hardware length 6 */
+	enum ocelot_vcap_bit length;
+
+	enum ocelot_vcap_bit ip;       /* Protocol address type IP */
+	enum  ocelot_vcap_bit ethernet; /* Hardware address type Ethernet */
+	struct ocelot_vcap_ipv4 sip;     /* Sender IP address */
+	struct ocelot_vcap_ipv4 dip;     /* Target IP address */
+};
+
+struct ocelot_ace_frame_ipv4 {
+	enum ocelot_vcap_bit ttl;      /* TTL zero */
+	enum ocelot_vcap_bit fragment; /* Fragment */
+	enum ocelot_vcap_bit options;  /* Header options */
+	struct ocelot_vcap_u8 ds;
+	struct ocelot_vcap_u8 proto;      /* Protocol */
+	struct ocelot_vcap_ipv4 sip;      /* Source IP address */
+	struct ocelot_vcap_ipv4 dip;      /* Destination IP address */
+	struct ocelot_vcap_u48 data;      /* Not UDP/TCP: IP data */
+	struct ocelot_vcap_udp_tcp sport; /* UDP/TCP: Source port */
+	struct ocelot_vcap_udp_tcp dport; /* UDP/TCP: Destination port */
+	enum ocelot_vcap_bit tcp_fin;
+	enum ocelot_vcap_bit tcp_syn;
+	enum ocelot_vcap_bit tcp_rst;
+	enum ocelot_vcap_bit tcp_psh;
+	enum ocelot_vcap_bit tcp_ack;
+	enum ocelot_vcap_bit tcp_urg;
+	enum ocelot_vcap_bit sip_eq_dip;     /* SIP equals DIP  */
+	enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT  */
+	enum ocelot_vcap_bit seq_zero;       /* TCP sequence number is zero */
+};
+
+struct ocelot_ace_frame_ipv6 {
+	struct ocelot_vcap_u8 proto; /* IPv6 protocol */
+	struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */
+	enum ocelot_vcap_bit ttl;  /* TTL zero */
+	struct ocelot_vcap_u8 ds;
+	struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */
+	struct ocelot_vcap_udp_tcp sport;
+	struct ocelot_vcap_udp_tcp dport;
+	enum ocelot_vcap_bit tcp_fin;
+	enum ocelot_vcap_bit tcp_syn;
+	enum ocelot_vcap_bit tcp_rst;
+	enum ocelot_vcap_bit tcp_psh;
+	enum ocelot_vcap_bit tcp_ack;
+	enum ocelot_vcap_bit tcp_urg;
+	enum ocelot_vcap_bit sip_eq_dip;     /* SIP equals DIP  */
+	enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT  */
+	enum ocelot_vcap_bit seq_zero;       /* TCP sequence number is zero */
+};
+
+enum ocelot_ace_action {
+	OCELOT_ACL_ACTION_DROP,
+	OCELOT_ACL_ACTION_TRAP,
+};
+
+struct ocelot_ace_stats {
+	u64 bytes;
+	u64 pkts;
+	u64 used;
+};
+
+struct ocelot_ace_rule {
+	struct list_head list;
+	struct ocelot_port *port;
+
+	u16 prio;
+	u32 id;
+
+	enum ocelot_ace_action action;
+	struct ocelot_ace_stats stats;
+	int chip_port;
+
+	enum ocelot_vcap_bit dmac_mc;
+	enum ocelot_vcap_bit dmac_bc;
+	struct ocelot_ace_vlan vlan;
+
+	enum ocelot_ace_type type;
+	union {
+		/* ocelot_ACE_TYPE_ANY: No specific fields */
+		struct ocelot_ace_frame_etype etype;
+		struct ocelot_ace_frame_llc llc;
+		struct ocelot_ace_frame_snap snap;
+		struct ocelot_ace_frame_arp arp;
+		struct ocelot_ace_frame_ipv4 ipv4;
+		struct ocelot_ace_frame_ipv6 ipv6;
+	} frame;
+};
+
+struct ocelot_acl_block {
+	struct list_head rules;
+	struct ocelot *ocelot;
+	int count;
+};
+
+int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule);
+int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule);
+int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule);
+
+int ocelot_ace_init(struct ocelot *ocelot);
+void ocelot_ace_deinit(void);
+
+int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
+				      struct flow_block_offload *f);
+void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
+					 struct flow_block_offload *f);
+
+#endif /* _MSCC_OCELOT_ACE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index e7f90101d2e0..58bde1a9eacb 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -188,6 +188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 		{ QSYS, "qsys" },
 		{ ANA, "ana" },
 		{ QS, "qs" },
+		{ S2, "s2" },
 	};
 
 	if (!np && !pdev->dev.platform_data)
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
new file mode 100644
index 000000000000..7aaddc09c185
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+
+#include "ocelot_ace.h"
+
+struct ocelot_port_block {
+	struct ocelot_acl_block *block;
+	struct ocelot_port *port;
+};
+
+static u16 get_prio(u32 prio)
+{
+	/* prio starts from 0x1000 while the ids starts from 0 */
+	return prio >> 16;
+}
+
+static int ocelot_flower_parse_action(struct flow_cls_offload *f,
+				      struct ocelot_ace_rule *rule)
+{
+	const struct flow_action_entry *a;
+	int i;
+
+	if (f->rule->action.num_entries != 1)
+		return -EOPNOTSUPP;
+
+	flow_action_for_each(i, a, &f->rule->action) {
+		switch (a->id) {
+		case FLOW_ACTION_DROP:
+			rule->action = OCELOT_ACL_ACTION_DROP;
+			break;
+		case FLOW_ACTION_TRAP:
+			rule->action = OCELOT_ACL_ACTION_TRAP;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int ocelot_flower_parse(struct flow_cls_offload *f,
+			       struct ocelot_ace_rule *ocelot_rule)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+		u16 proto = ntohs(f->common.protocol);
+
+		/* The hw support mac matches only for MAC_ETYPE key,
+		 * therefore if other matches(port, tcp flags, etc) are added
+		 * then just bail out
+		 */
+		if ((dissector->used_keys &
+		    (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+		     BIT(FLOW_DISSECTOR_KEY_BASIC) |
+		     BIT(FLOW_DISSECTOR_KEY_CONTROL))) !=
+		    (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+		     BIT(FLOW_DISSECTOR_KEY_BASIC) |
+		     BIT(FLOW_DISSECTOR_KEY_CONTROL)))
+			return -EOPNOTSUPP;
+
+		if (proto == ETH_P_IP ||
+		    proto == ETH_P_IPV6 ||
+		    proto == ETH_P_ARP)
+			return -EOPNOTSUPP;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		ocelot_rule->type = OCELOT_ACE_TYPE_ETYPE;
+		ether_addr_copy(ocelot_rule->frame.etype.dmac.value,
+				match.key->dst);
+		ether_addr_copy(ocelot_rule->frame.etype.smac.value,
+				match.key->src);
+		ether_addr_copy(ocelot_rule->frame.etype.dmac.mask,
+				match.mask->dst);
+		ether_addr_copy(ocelot_rule->frame.etype.smac.mask,
+				match.mask->src);
+		goto finished_key_parsing;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		if (ntohs(match.key->n_proto) == ETH_P_IP) {
+			ocelot_rule->type = OCELOT_ACE_TYPE_IPV4;
+			ocelot_rule->frame.ipv4.proto.value[0] =
+				match.key->ip_proto;
+			ocelot_rule->frame.ipv4.proto.mask[0] =
+				match.mask->ip_proto;
+		}
+		if (ntohs(match.key->n_proto) == ETH_P_IPV6) {
+			ocelot_rule->type = OCELOT_ACE_TYPE_IPV6;
+			ocelot_rule->frame.ipv6.proto.value[0] =
+				match.key->ip_proto;
+			ocelot_rule->frame.ipv6.proto.mask[0] =
+				match.mask->ip_proto;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) &&
+	    ntohs(f->common.protocol) == ETH_P_IP) {
+		struct flow_match_ipv4_addrs match;
+		u8 *tmp;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		tmp = &ocelot_rule->frame.ipv4.sip.value.addr[0];
+		memcpy(tmp, &match.key->src, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.sip.mask.addr[0];
+		memcpy(tmp, &match.mask->src, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.dip.value.addr[0];
+		memcpy(tmp, &match.key->dst, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.dip.mask.addr[0];
+		memcpy(tmp, &match.mask->dst, 4);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) &&
+	    ntohs(f->common.protocol) == ETH_P_IPV6) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		ocelot_rule->frame.ipv4.sport.value = ntohs(match.key->src);
+		ocelot_rule->frame.ipv4.sport.mask = ntohs(match.mask->src);
+		ocelot_rule->frame.ipv4.dport.value = ntohs(match.key->dst);
+		ocelot_rule->frame.ipv4.dport.mask = ntohs(match.mask->dst);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		ocelot_rule->type = OCELOT_ACE_TYPE_ANY;
+		ocelot_rule->vlan.vid.value = match.key->vlan_id;
+		ocelot_rule->vlan.vid.mask = match.mask->vlan_id;
+		ocelot_rule->vlan.pcp.value[0] = match.key->vlan_priority;
+		ocelot_rule->vlan.pcp.mask[0] = match.mask->vlan_priority;
+	}
+
+finished_key_parsing:
+	ocelot_rule->prio = get_prio(f->common.prio);
+	ocelot_rule->id = f->cookie;
+	return ocelot_flower_parse_action(f, ocelot_rule);
+}
+
+static
+struct ocelot_ace_rule *ocelot_ace_rule_create(struct flow_cls_offload *f,
+					       struct ocelot_port_block *block)
+{
+	struct ocelot_ace_rule *rule;
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->port = block->port;
+	rule->chip_port = block->port->chip_port;
+	return rule;
+}
+
+static int ocelot_flower_replace(struct flow_cls_offload *f,
+				 struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule *rule;
+	int ret;
+
+	rule = ocelot_ace_rule_create(f, port_block);
+	if (!rule)
+		return -ENOMEM;
+
+	ret = ocelot_flower_parse(f, rule);
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
+
+	ret = ocelot_ace_rule_offload_add(rule);
+	if (ret)
+		return ret;
+
+	port_block->port->tc.offload_cnt++;
+	return 0;
+}
+
+static int ocelot_flower_destroy(struct flow_cls_offload *f,
+				 struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule rule;
+	int ret;
+
+	rule.prio = get_prio(f->common.prio);
+	rule.port = port_block->port;
+	rule.id = f->cookie;
+
+	ret = ocelot_ace_rule_offload_del(&rule);
+	if (ret)
+		return ret;
+
+	port_block->port->tc.offload_cnt--;
+	return 0;
+}
+
+static int ocelot_flower_stats_update(struct flow_cls_offload *f,
+				      struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule rule;
+	int ret;
+
+	rule.prio = get_prio(f->common.prio);
+	rule.port = port_block->port;
+	rule.id = f->cookie;
+	ret = ocelot_ace_rule_stats_update(&rule);
+	if (ret)
+		return ret;
+
+	flow_stats_update(&f->stats, 0x0, rule.stats.pkts, 0x0);
+	return 0;
+}
+
+static int ocelot_setup_tc_cls_flower(struct flow_cls_offload *f,
+				      struct ocelot_port_block *port_block)
+{
+	switch (f->command) {
+	case FLOW_CLS_REPLACE:
+		return ocelot_flower_replace(f, port_block);
+	case FLOW_CLS_DESTROY:
+		return ocelot_flower_destroy(f, port_block);
+	case FLOW_CLS_STATS:
+		return ocelot_flower_stats_update(f, port_block);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb_flower(enum tc_setup_type type,
+					   void *type_data, void *cb_priv)
+{
+	struct ocelot_port_block *port_block = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(port_block->port->dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return ocelot_setup_tc_cls_flower(type_data, cb_priv);
+	case TC_SETUP_CLSMATCHALL:
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct ocelot_port_block*
+ocelot_port_block_create(struct ocelot_port *port)
+{
+	struct ocelot_port_block *port_block;
+
+	port_block = kzalloc(sizeof(*port_block), GFP_KERNEL);
+	if (!port_block)
+		return NULL;
+
+	port_block->port = port;
+
+	return port_block;
+}
+
+static void ocelot_port_block_destroy(struct ocelot_port_block *block)
+{
+	kfree(block);
+}
+
+static void ocelot_tc_block_unbind(void *cb_priv)
+{
+	struct ocelot_port_block *port_block = cb_priv;
+
+	ocelot_port_block_destroy(port_block);
+}
+
+int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
+				      struct flow_block_offload *f)
+{
+	struct ocelot_port_block *port_block;
+	struct flow_block_cb *block_cb;
+	int ret;
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+		return -EOPNOTSUPP;
+
+	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
+					port);
+	if (!block_cb) {
+		port_block = ocelot_port_block_create(port);
+		if (!port_block)
+			return -ENOMEM;
+
+		block_cb = flow_block_cb_alloc(f->net,
+					       ocelot_setup_tc_block_cb_flower,
+					       port, port_block,
+					       ocelot_tc_block_unbind);
+		if (IS_ERR(block_cb)) {
+			ret = PTR_ERR(block_cb);
+			goto err_cb_register;
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, f->driver_block_list);
+	} else {
+		port_block = flow_block_cb_priv(block_cb);
+	}
+
+	flow_block_cb_incref(block_cb);
+	return 0;
+
+err_cb_register:
+	ocelot_port_block_destroy(port_block);
+
+	return ret;
+}
+
+void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
+					 struct flow_block_offload *f)
+{
+	struct flow_block_cb *block_cb;
+
+	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
+					port);
+	if (!block_cb)
+		return;
+
+	if (!flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+	}
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c
new file mode 100644
index 000000000000..701e82dd749a
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_police.h"
+
+enum mscc_qos_rate_mode {
+	MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */
+	MSCC_QOS_RATE_MODE_LINE, /* Measure line rate in kbps incl. IPG */
+	MSCC_QOS_RATE_MODE_DATA, /* Measures data rate in kbps excl. IPG */
+	MSCC_QOS_RATE_MODE_FRAME, /* Measures frame rate in fps */
+	__MSCC_QOS_RATE_MODE_END,
+	NUM_MSCC_QOS_RATE_MODE = __MSCC_QOS_RATE_MODE_END,
+	MSCC_QOS_RATE_MODE_MAX = __MSCC_QOS_RATE_MODE_END - 1,
+};
+
+/* Types for ANA:POL[0-192]:POL_MODE_CFG.FRM_MODE */
+#define POL_MODE_LINERATE   0 /* Incl IPG. Unit: 33 1/3 kbps, 4096 bytes */
+#define POL_MODE_DATARATE   1 /* Excl IPG. Unit: 33 1/3 kbps, 4096 bytes  */
+#define POL_MODE_FRMRATE_HI 2 /* Unit: 33 1/3 fps, 32.8 frames */
+#define POL_MODE_FRMRATE_LO 3 /* Unit: 1/3 fps, 0.3 frames */
+
+/* Policer indexes */
+#define POL_IX_PORT    0    /* 0-11    : Port policers */
+#define POL_IX_QUEUE   32   /* 32-127  : Queue policers  */
+
+/* Default policer order */
+#define POL_ORDER 0x1d3 /* Ocelot policer order: Serial (QoS -> Port -> VCAP) */
+
+struct qos_policer_conf {
+	enum mscc_qos_rate_mode mode;
+	bool dlb; /* Enable DLB (dual leaky bucket mode */
+	bool cf;  /* Coupling flag (ignored in SLB mode) */
+	u32  cir; /* CIR in kbps/fps (ignored in SLB mode) */
+	u32  cbs; /* CBS in bytes/frames (ignored in SLB mode) */
+	u32  pir; /* PIR in kbps/fps */
+	u32  pbs; /* PBS in bytes/frames */
+	u8   ipg; /* Size of IPG when MSCC_QOS_RATE_MODE_LINE is chosen */
+};
+
+static int qos_policer_conf_set(struct ocelot_port *port, u32 pol_ix,
+				struct qos_policer_conf *conf)
+{
+	u32 cf = 0, cir_ena = 0, frm_mode = POL_MODE_LINERATE;
+	u32 cir = 0, cbs = 0, pir = 0, pbs = 0;
+	bool cir_discard = 0, pir_discard = 0;
+	struct ocelot *ocelot = port->ocelot;
+	u32 pbs_max = 0, cbs_max = 0;
+	u8 ipg = 20;
+	u32 value;
+
+	pir = conf->pir;
+	pbs = conf->pbs;
+
+	switch (conf->mode) {
+	case MSCC_QOS_RATE_MODE_LINE:
+	case MSCC_QOS_RATE_MODE_DATA:
+		if (conf->mode == MSCC_QOS_RATE_MODE_LINE) {
+			frm_mode = POL_MODE_LINERATE;
+			ipg = min_t(u8, GENMASK(4, 0), conf->ipg);
+		} else {
+			frm_mode = POL_MODE_DATARATE;
+		}
+		if (conf->dlb) {
+			cir_ena = 1;
+			cir = conf->cir;
+			cbs = conf->cbs;
+			if (cir == 0 && cbs == 0) {
+				/* Discard cir frames */
+				cir_discard = 1;
+			} else {
+				cir = DIV_ROUND_UP(cir, 100);
+				cir *= 3; /* 33 1/3 kbps */
+				cbs = DIV_ROUND_UP(cbs, 4096);
+				cbs = (cbs ? cbs : 1); /* No zero burst size */
+				cbs_max = 60; /* Limit burst size */
+				cf = conf->cf;
+				if (cf)
+					pir += conf->cir;
+			}
+		}
+		if (pir == 0 && pbs == 0) {
+			/* Discard PIR frames */
+			pir_discard = 1;
+		} else {
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 kbps */
+			pbs = DIV_ROUND_UP(pbs, 4096);
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = 60; /* Limit burst size */
+		}
+		break;
+	case MSCC_QOS_RATE_MODE_FRAME:
+		if (pir >= 100) {
+			frm_mode = POL_MODE_FRMRATE_HI;
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 fps */
+			pbs = (pbs * 10) / 328; /* 32.8 frames */
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = GENMASK(6, 0); /* Limit burst size */
+		} else {
+			frm_mode = POL_MODE_FRMRATE_LO;
+			if (pir == 0 && pbs == 0) {
+				/* Discard all frames */
+				pir_discard = 1;
+				cir_discard = 1;
+			} else {
+				pir *= 3; /* 1/3 fps */
+				pbs = (pbs * 10) / 3; /* 0.3 frames */
+				pbs = (pbs ? pbs : 1); /* No zero burst size */
+				pbs_max = 61; /* Limit burst size */
+			}
+		}
+		break;
+	default: /* MSCC_QOS_RATE_MODE_DISABLED */
+		/* Disable policer using maximum rate and zero burst */
+		pir = GENMASK(15, 0);
+		pbs = 0;
+		break;
+	}
+
+	/* Check limits */
+	if (pir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid pir\n");
+		return -EINVAL;
+	}
+
+	if (cir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid cir\n");
+		return -EINVAL;
+	}
+
+	if (pbs > pbs_max) {
+		netdev_err(port->dev, "Invalid pbs\n");
+		return -EINVAL;
+	}
+
+	if (cbs > cbs_max) {
+		netdev_err(port->dev, "Invalid cbs\n");
+		return -EINVAL;
+	}
+
+	value = (ANA_POL_MODE_CFG_IPG_SIZE(ipg) |
+		 ANA_POL_MODE_CFG_FRM_MODE(frm_mode) |
+		 (cf ? ANA_POL_MODE_CFG_DLB_COUPLED : 0) |
+		 (cir_ena ? ANA_POL_MODE_CFG_CIR_ENA : 0) |
+		 ANA_POL_MODE_CFG_OVERSHOOT_ENA);
+
+	ocelot_write_gix(ocelot, value, ANA_POL_MODE_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_PIR_CFG_PIR_RATE(pir) |
+			 ANA_POL_PIR_CFG_PIR_BURST(pbs),
+			 ANA_POL_PIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (pir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_PIR_STATE, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_CIR_CFG_CIR_RATE(cir) |
+			 ANA_POL_CIR_CFG_CIR_BURST(cbs),
+			 ANA_POL_CIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (cir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_CIR_STATE, pol_ix);
+
+	return 0;
+}
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	if (!pol)
+		return -EINVAL;
+
+	pp.mode = MSCC_QOS_RATE_MODE_DATA;
+	pp.pir = pol->rate;
+	pp.pbs = pol->burst;
+
+	netdev_dbg(port->dev,
+		   "%s: port %u pir %u kbps, pbs %u bytes\n",
+		   __func__, port->chip_port, pp.pir, pp.pbs);
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}
+
+int ocelot_port_policer_del(struct ocelot_port *port)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u\n", __func__, port->chip_port);
+
+	pp.mode = MSCC_QOS_RATE_MODE_DISABLED;
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h
new file mode 100644
index 000000000000..d1137f79efda
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_POLICE_H_
+#define _MSCC_OCELOT_POLICE_H_
+
+#include "ocelot.h"
+
+struct ocelot_policer {
+	u32 rate; /* kilobit per second */
+	u32 burst; /* bytes */
+};
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol);
+
+int ocelot_port_policer_del(struct ocelot_port *port);
+
+#endif /* _MSCC_OCELOT_POLICE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
index 9271af18b93b..6c387f994ec5 100644
--- a/drivers/net/ethernet/mscc/ocelot_regs.c
+++ b/drivers/net/ethernet/mscc/ocelot_regs.c
@@ -224,12 +224,23 @@ static const u32 ocelot_sys_regmap[] = {
 	REG(SYS_PTP_CFG,                   0x0006c4),
 };
 
+static const u32 ocelot_s2_regmap[] = {
+	REG(S2_CORE_UPDATE_CTRL,           0x000000),
+	REG(S2_CORE_MV_CFG,                0x000004),
+	REG(S2_CACHE_ENTRY_DAT,            0x000008),
+	REG(S2_CACHE_MASK_DAT,             0x000108),
+	REG(S2_CACHE_ACTION_DAT,           0x000208),
+	REG(S2_CACHE_CNT_DAT,              0x000308),
+	REG(S2_CACHE_TG_DAT,               0x000388),
+};
+
 static const u32 *ocelot_regmap[] = {
 	[ANA] = ocelot_ana_regmap,
 	[QS] = ocelot_qs_regmap,
 	[QSYS] = ocelot_qsys_regmap,
 	[REW] = ocelot_rew_regmap,
 	[SYS] = ocelot_sys_regmap,
+	[S2] = ocelot_s2_regmap,
 };
 
 static const struct reg_field ocelot_regfields[] = {
diff --git a/drivers/net/ethernet/mscc/ocelot_s2.h b/drivers/net/ethernet/mscc/ocelot_s2.h
new file mode 100644
index 000000000000..80107bec2e45
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_s2.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2018 Microsemi Corporation
+ */
+
+#ifndef _OCELOT_S2_CORE_H_
+#define _OCELOT_S2_CORE_H_
+
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD(x)      (((x) << 22) & GENMASK(24, 22))
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_M       GENMASK(24, 22)
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_X(x)    (((x) & GENMASK(24, 22)) >> 22)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS   BIT(21)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS  BIT(20)
+#define S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS     BIT(19)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR(x)     (((x) << 3) & GENMASK(18, 3))
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_M      GENMASK(18, 3)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_X(x)   (((x) & GENMASK(18, 3)) >> 3)
+#define S2_CORE_UPDATE_CTRL_UPDATE_SHOT        BIT(2)
+#define S2_CORE_UPDATE_CTRL_CLEAR_CACHE        BIT(1)
+#define S2_CORE_UPDATE_CTRL_MV_TRAFFIC_IGN     BIT(0)
+
+#define S2_CORE_MV_CFG_MV_NUM_POS(x)           (((x) << 16) & GENMASK(31, 16))
+#define S2_CORE_MV_CFG_MV_NUM_POS_M            GENMASK(31, 16)
+#define S2_CORE_MV_CFG_MV_NUM_POS_X(x)         (((x) & GENMASK(31, 16)) >> 16)
+#define S2_CORE_MV_CFG_MV_SIZE(x)              ((x) & GENMASK(15, 0))
+#define S2_CORE_MV_CFG_MV_SIZE_M               GENMASK(15, 0)
+
+#define S2_CACHE_ENTRY_DAT_RSZ                 0x4
+
+#define S2_CACHE_MASK_DAT_RSZ                  0x4
+
+#define S2_CACHE_ACTION_DAT_RSZ                0x4
+
+#define S2_CACHE_CNT_DAT_RSZ                   0x4
+
+#define S2_STICKY_VCAP_ROW_DELETED_STICKY      BIT(0)
+
+#define S2_BIST_CTRL_TCAM_BIST                 BIT(1)
+#define S2_BIST_CTRL_TCAM_INIT                 BIT(0)
+
+#define S2_BIST_CFG_TCAM_BIST_SOE_ENA          BIT(8)
+#define S2_BIST_CFG_TCAM_HCG_DIS               BIT(7)
+#define S2_BIST_CFG_TCAM_CG_DIS                BIT(6)
+#define S2_BIST_CFG_TCAM_BIAS(x)               ((x) & GENMASK(5, 0))
+#define S2_BIST_CFG_TCAM_BIAS_M                GENMASK(5, 0)
+
+#define S2_BIST_STAT_BIST_RT_ERR               BIT(15)
+#define S2_BIST_STAT_BIST_PENC_ERR             BIT(14)
+#define S2_BIST_STAT_BIST_COMP_ERR             BIT(13)
+#define S2_BIST_STAT_BIST_ADDR_ERR             BIT(12)
+#define S2_BIST_STAT_BIST_BL1E_ERR             BIT(11)
+#define S2_BIST_STAT_BIST_BL1_ERR              BIT(10)
+#define S2_BIST_STAT_BIST_BL0E_ERR             BIT(9)
+#define S2_BIST_STAT_BIST_BL0_ERR              BIT(8)
+#define S2_BIST_STAT_BIST_PH1_ERR              BIT(7)
+#define S2_BIST_STAT_BIST_PH0_ERR              BIT(6)
+#define S2_BIST_STAT_BIST_PV1_ERR              BIT(5)
+#define S2_BIST_STAT_BIST_PV0_ERR              BIT(4)
+#define S2_BIST_STAT_BIST_RUN                  BIT(3)
+#define S2_BIST_STAT_BIST_ERR                  BIT(2)
+#define S2_BIST_STAT_BIST_BUSY                 BIT(1)
+#define S2_BIST_STAT_TCAM_RDY                  BIT(0)
+
+#endif /* _OCELOT_S2_CORE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
new file mode 100644
index 000000000000..9e6464ffae5d
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch TC driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_tc.h"
+#include "ocelot_police.h"
+#include "ocelot_ace.h"
+#include <net/pkt_cls.h>
+
+static int ocelot_setup_tc_cls_matchall(struct ocelot_port *port,
+					struct tc_cls_matchall_offload *f,
+					bool ingress)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct ocelot_policer pol = { 0 };
+	struct flow_action_entry *action;
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u command %d cookie %lu\n",
+		   __func__, port->chip_port, f->command, f->cookie);
+
+	if (!ingress) {
+		NL_SET_ERR_MSG_MOD(extack, "Only ingress is supported");
+		return -EOPNOTSUPP;
+	}
+
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		if (!flow_offload_has_one_action(&f->rule->action)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one action is supported");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.block_shared) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Rate limit is not supported on shared blocks");
+			return -EOPNOTSUPP;
+		}
+
+		action = &f->rule->action.entries[0];
+
+		if (action->id != FLOW_ACTION_POLICE) {
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.police_id && port->tc.police_id != f->cookie) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one policer per port is supported\n");
+			return -EEXIST;
+		}
+
+		pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
+		pol.burst = (u32)div_u64(action->police.rate_bytes_ps *
+					 PSCHED_NS2TICKS(action->police.burst),
+					 PSCHED_TICKS_PER_SEC);
+
+		err = ocelot_port_policer_add(port, &pol);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n");
+			return err;
+		}
+
+		port->tc.police_id = f->cookie;
+		port->tc.offload_cnt++;
+		return 0;
+	case TC_CLSMATCHALL_DESTROY:
+		if (port->tc.police_id != f->cookie)
+			return -ENOENT;
+
+		err = ocelot_port_policer_del(port);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Could not delete policer\n");
+			return err;
+		}
+		port->tc.police_id = 0;
+		port->tc.offload_cnt--;
+		return 0;
+	case TC_CLSMATCHALL_STATS: /* fall through */
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb(enum tc_setup_type type,
+				    void *type_data,
+				    void *cb_priv, bool ingress)
+{
+	struct ocelot_port *port = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(port->dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSMATCHALL %s\n",
+			   ingress ? "ingress" : "egress");
+
+		return ocelot_setup_tc_cls_matchall(port, type_data, ingress);
+	case TC_SETUP_CLSFLOWER:
+		return 0;
+	default:
+		netdev_dbg(port->dev, "tc_block_cb: type %d %s\n",
+			   type,
+			   ingress ? "ingress" : "egress");
+
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb_ig(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, true);
+}
+
+static int ocelot_setup_tc_block_cb_eg(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, false);
+}
+
+static LIST_HEAD(ocelot_block_cb_list);
+
+static int ocelot_setup_tc_block(struct ocelot_port *port,
+				 struct flow_block_offload *f)
+{
+	struct flow_block_cb *block_cb;
+	tc_setup_cb_t *cb;
+	int err;
+
+	netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
+		   f->command, f->binder_type);
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+		cb = ocelot_setup_tc_block_cb_ig;
+		port->tc.block_shared = f->block_shared;
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = ocelot_setup_tc_block_cb_eg;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	f->driver_block_list = &ocelot_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		err = ocelot_setup_tc_block_flower_bind(port, f);
+		if (err < 0) {
+			flow_block_cb_free(block_cb);
+			return err;
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, f->driver_block_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f, cb, port);
+		if (!block_cb)
+			return -ENOENT;
+
+		ocelot_setup_tc_block_flower_unbind(port, f);
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return ocelot_setup_tc_block(port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.h b/drivers/net/ethernet/mscc/ocelot_tc.h
new file mode 100644
index 000000000000..61757c2250a6
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_TC_H_
+#define _MSCC_OCELOT_TC_H_
+
+#include <linux/netdevice.h>
+
+struct ocelot_port_tc {
+	bool block_shared;
+	unsigned long offload_cnt;
+
+	unsigned long police_id;
+};
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data);
+
+#endif /* _MSCC_OCELOT_TC_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h
new file mode 100644
index 000000000000..e22eac1da783
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.h
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _OCELOT_VCAP_H_
+#define _OCELOT_VCAP_H_
+
+/* =================================================================
+ *  VCAP Common
+ * =================================================================
+ */
+
+/* VCAP Type-Group values */
+#define VCAP_TG_NONE 0 /* Entry is invalid */
+#define VCAP_TG_FULL 1 /* Full entry */
+#define VCAP_TG_HALF 2 /* Half entry */
+#define VCAP_TG_QUARTER 3 /* Quarter entry */
+
+/* =================================================================
+ *  VCAP IS2
+ * =================================================================
+ */
+
+#define VCAP_IS2_CNT 64
+#define VCAP_IS2_ENTRY_WIDTH 376
+#define VCAP_IS2_ACTION_WIDTH 99
+#define VCAP_PORT_CNT 11
+
+/* IS2 half key types */
+#define IS2_TYPE_ETYPE 0
+#define IS2_TYPE_LLC 1
+#define IS2_TYPE_SNAP 2
+#define IS2_TYPE_ARP 3
+#define IS2_TYPE_IP_UDP_TCP 4
+#define IS2_TYPE_IP_OTHER 5
+#define IS2_TYPE_IPV6 6
+#define IS2_TYPE_OAM 7
+#define IS2_TYPE_SMAC_SIP6 8
+#define IS2_TYPE_ANY 100 /* Pseudo type */
+
+/* IS2 half key type mask for matching any IP */
+#define IS2_TYPE_MASK_IP_ANY 0xe
+
+/* IS2 action types */
+#define IS2_ACTION_TYPE_NORMAL 0
+#define IS2_ACTION_TYPE_SMAC_SIP 1
+
+/* IS2 MASK_MODE values */
+#define IS2_ACT_MASK_MODE_NONE 0
+#define IS2_ACT_MASK_MODE_FILTER 1
+#define IS2_ACT_MASK_MODE_POLICY 2
+#define IS2_ACT_MASK_MODE_REDIR 3
+
+/* IS2 REW_OP values */
+#define IS2_ACT_REW_OP_NONE 0
+#define IS2_ACT_REW_OP_PTP_ONE 2
+#define IS2_ACT_REW_OP_PTP_TWO 3
+#define IS2_ACT_REW_OP_SPECIAL 8
+#define IS2_ACT_REW_OP_PTP_ORG 9
+#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_1 (IS2_ACT_REW_OP_PTP_ONE | (1 << 3))
+#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_2 (IS2_ACT_REW_OP_PTP_ONE | (2 << 3))
+#define IS2_ACT_REW_OP_PTP_ONE_ADD_DELAY (IS2_ACT_REW_OP_PTP_ONE | (1 << 5))
+#define IS2_ACT_REW_OP_PTP_ONE_ADD_SUB BIT(7)
+
+#define VCAP_PORT_WIDTH 4
+
+/* IS2 quarter key - SMAC_SIP4 */
+#define IS2_QKO_IGR_PORT 0
+#define IS2_QKL_IGR_PORT VCAP_PORT_WIDTH
+#define IS2_QKO_L2_SMAC (IS2_QKO_IGR_PORT + IS2_QKL_IGR_PORT)
+#define IS2_QKL_L2_SMAC 48
+#define IS2_QKO_L3_IP4_SIP (IS2_QKO_L2_SMAC + IS2_QKL_L2_SMAC)
+#define IS2_QKL_L3_IP4_SIP 32
+
+/* IS2 half key - common */
+#define IS2_HKO_TYPE 0
+#define IS2_HKL_TYPE 4
+#define IS2_HKO_FIRST (IS2_HKO_TYPE + IS2_HKL_TYPE)
+#define IS2_HKL_FIRST 1
+#define IS2_HKO_PAG (IS2_HKO_FIRST + IS2_HKL_FIRST)
+#define IS2_HKL_PAG 8
+#define IS2_HKO_IGR_PORT_MASK (IS2_HKO_PAG + IS2_HKL_PAG)
+#define IS2_HKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1)
+#define IS2_HKO_SERVICE_FRM (IS2_HKO_IGR_PORT_MASK + IS2_HKL_IGR_PORT_MASK)
+#define IS2_HKL_SERVICE_FRM 1
+#define IS2_HKO_HOST_MATCH (IS2_HKO_SERVICE_FRM + IS2_HKL_SERVICE_FRM)
+#define IS2_HKL_HOST_MATCH 1
+#define IS2_HKO_L2_MC (IS2_HKO_HOST_MATCH + IS2_HKL_HOST_MATCH)
+#define IS2_HKL_L2_MC 1
+#define IS2_HKO_L2_BC (IS2_HKO_L2_MC + IS2_HKL_L2_MC)
+#define IS2_HKL_L2_BC 1
+#define IS2_HKO_VLAN_TAGGED (IS2_HKO_L2_BC + IS2_HKL_L2_BC)
+#define IS2_HKL_VLAN_TAGGED 1
+#define IS2_HKO_VID (IS2_HKO_VLAN_TAGGED + IS2_HKL_VLAN_TAGGED)
+#define IS2_HKL_VID 12
+#define IS2_HKO_DEI (IS2_HKO_VID + IS2_HKL_VID)
+#define IS2_HKL_DEI 1
+#define IS2_HKO_PCP (IS2_HKO_DEI + IS2_HKL_DEI)
+#define IS2_HKL_PCP 3
+
+/* IS2 half key - MAC_ETYPE/MAC_LLC/MAC_SNAP/OAM common */
+#define IS2_HKO_L2_DMAC (IS2_HKO_PCP + IS2_HKL_PCP)
+#define IS2_HKL_L2_DMAC 48
+#define IS2_HKO_L2_SMAC (IS2_HKO_L2_DMAC + IS2_HKL_L2_DMAC)
+#define IS2_HKL_L2_SMAC 48
+
+/* IS2 half key - MAC_ETYPE */
+#define IS2_HKO_MAC_ETYPE_ETYPE (IS2_HKO_L2_SMAC + IS2_HKL_L2_SMAC)
+#define IS2_HKL_MAC_ETYPE_ETYPE 16
+#define IS2_HKO_MAC_ETYPE_L2_PAYLOAD                                           \
+	(IS2_HKO_MAC_ETYPE_ETYPE + IS2_HKL_MAC_ETYPE_ETYPE)
+#define IS2_HKL_MAC_ETYPE_L2_PAYLOAD 27
+
+/* IS2 half key - MAC_LLC */
+#define IS2_HKO_MAC_LLC_L2_LLC IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_MAC_LLC_L2_LLC 40
+
+/* IS2 half key - MAC_SNAP */
+#define IS2_HKO_MAC_SNAP_L2_SNAP IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_MAC_SNAP_L2_SNAP 40
+
+/* IS2 half key - ARP */
+#define IS2_HKO_MAC_ARP_L2_SMAC IS2_HKO_L2_DMAC
+#define IS2_HKL_MAC_ARP_L2_SMAC 48
+#define IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK                                      \
+	(IS2_HKO_MAC_ARP_L2_SMAC + IS2_HKL_MAC_ARP_L2_SMAC)
+#define IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK 1
+#define IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK                                     \
+	(IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK + IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK)
+#define IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK 1
+#define IS2_HKO_MAC_ARP_ARP_LEN_OK                                             \
+	(IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK +                                  \
+	 IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK)
+#define IS2_HKL_MAC_ARP_ARP_LEN_OK 1
+#define IS2_HKO_MAC_ARP_ARP_TGT_MATCH                                          \
+	(IS2_HKO_MAC_ARP_ARP_LEN_OK + IS2_HKL_MAC_ARP_ARP_LEN_OK)
+#define IS2_HKL_MAC_ARP_ARP_TGT_MATCH 1
+#define IS2_HKO_MAC_ARP_ARP_SENDER_MATCH                                       \
+	(IS2_HKO_MAC_ARP_ARP_TGT_MATCH + IS2_HKL_MAC_ARP_ARP_TGT_MATCH)
+#define IS2_HKL_MAC_ARP_ARP_SENDER_MATCH 1
+#define IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN                                     \
+	(IS2_HKO_MAC_ARP_ARP_SENDER_MATCH + IS2_HKL_MAC_ARP_ARP_SENDER_MATCH)
+#define IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN 1
+#define IS2_HKO_MAC_ARP_ARP_OPCODE                                             \
+	(IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN +                                  \
+	 IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN)
+#define IS2_HKL_MAC_ARP_ARP_OPCODE 2
+#define IS2_HKO_MAC_ARP_L3_IP4_DIP                                             \
+	(IS2_HKO_MAC_ARP_ARP_OPCODE + IS2_HKL_MAC_ARP_ARP_OPCODE)
+#define IS2_HKL_MAC_ARP_L3_IP4_DIP 32
+#define IS2_HKO_MAC_ARP_L3_IP4_SIP                                             \
+	(IS2_HKO_MAC_ARP_L3_IP4_DIP + IS2_HKL_MAC_ARP_L3_IP4_DIP)
+#define IS2_HKL_MAC_ARP_L3_IP4_SIP 32
+#define IS2_HKO_MAC_ARP_DIP_EQ_SIP                                             \
+	(IS2_HKO_MAC_ARP_L3_IP4_SIP + IS2_HKL_MAC_ARP_L3_IP4_SIP)
+#define IS2_HKL_MAC_ARP_DIP_EQ_SIP 1
+
+/* IS2 half key - IP4_TCP_UDP/IP4_OTHER common */
+#define IS2_HKO_IP4 IS2_HKO_L2_DMAC
+#define IS2_HKL_IP4 1
+#define IS2_HKO_L3_FRAGMENT (IS2_HKO_IP4 + IS2_HKL_IP4)
+#define IS2_HKL_L3_FRAGMENT 1
+#define IS2_HKO_L3_FRAG_OFS_GT0 (IS2_HKO_L3_FRAGMENT + IS2_HKL_L3_FRAGMENT)
+#define IS2_HKL_L3_FRAG_OFS_GT0 1
+#define IS2_HKO_L3_OPTIONS (IS2_HKO_L3_FRAG_OFS_GT0 + IS2_HKL_L3_FRAG_OFS_GT0)
+#define IS2_HKL_L3_OPTIONS 1
+#define IS2_HKO_L3_TTL_GT0 (IS2_HKO_L3_OPTIONS + IS2_HKL_L3_OPTIONS)
+#define IS2_HKL_L3_TTL_GT0 1
+#define IS2_HKO_L3_TOS (IS2_HKO_L3_TTL_GT0 + IS2_HKL_L3_TTL_GT0)
+#define IS2_HKL_L3_TOS 8
+#define IS2_HKO_L3_IP4_DIP (IS2_HKO_L3_TOS + IS2_HKL_L3_TOS)
+#define IS2_HKL_L3_IP4_DIP 32
+#define IS2_HKO_L3_IP4_SIP (IS2_HKO_L3_IP4_DIP + IS2_HKL_L3_IP4_DIP)
+#define IS2_HKL_L3_IP4_SIP 32
+#define IS2_HKO_DIP_EQ_SIP (IS2_HKO_L3_IP4_SIP + IS2_HKL_L3_IP4_SIP)
+#define IS2_HKL_DIP_EQ_SIP 1
+
+/* IS2 half key - IP4_TCP_UDP */
+#define IS2_HKO_IP4_TCP_UDP_TCP (IS2_HKO_DIP_EQ_SIP + IS2_HKL_DIP_EQ_SIP)
+#define IS2_HKL_IP4_TCP_UDP_TCP 1
+#define IS2_HKO_IP4_TCP_UDP_L4_DPORT                                           \
+	(IS2_HKO_IP4_TCP_UDP_TCP + IS2_HKL_IP4_TCP_UDP_TCP)
+#define IS2_HKL_IP4_TCP_UDP_L4_DPORT 16
+#define IS2_HKO_IP4_TCP_UDP_L4_SPORT                                           \
+	(IS2_HKO_IP4_TCP_UDP_L4_DPORT + IS2_HKL_IP4_TCP_UDP_L4_DPORT)
+#define IS2_HKL_IP4_TCP_UDP_L4_SPORT 16
+#define IS2_HKO_IP4_TCP_UDP_L4_RNG                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_SPORT + IS2_HKL_IP4_TCP_UDP_L4_SPORT)
+#define IS2_HKL_IP4_TCP_UDP_L4_RNG 8
+#define IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT                                     \
+	(IS2_HKO_IP4_TCP_UDP_L4_RNG + IS2_HKL_IP4_TCP_UDP_L4_RNG)
+#define IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT 1
+#define IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0                                       \
+	(IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT +                                  \
+	 IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT)
+#define IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0 1
+#define IS2_HKO_IP4_TCP_UDP_L4_FIN                                             \
+	(IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0 + IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0)
+#define IS2_HKL_IP4_TCP_UDP_L4_FIN 1
+#define IS2_HKO_IP4_TCP_UDP_L4_SYN                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_FIN + IS2_HKL_IP4_TCP_UDP_L4_FIN)
+#define IS2_HKL_IP4_TCP_UDP_L4_SYN 1
+#define IS2_HKO_IP4_TCP_UDP_L4_RST                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_SYN + IS2_HKL_IP4_TCP_UDP_L4_SYN)
+#define IS2_HKL_IP4_TCP_UDP_L4_RST 1
+#define IS2_HKO_IP4_TCP_UDP_L4_PSH                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_RST + IS2_HKL_IP4_TCP_UDP_L4_RST)
+#define IS2_HKL_IP4_TCP_UDP_L4_PSH 1
+#define IS2_HKO_IP4_TCP_UDP_L4_ACK                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_PSH + IS2_HKL_IP4_TCP_UDP_L4_PSH)
+#define IS2_HKL_IP4_TCP_UDP_L4_ACK 1
+#define IS2_HKO_IP4_TCP_UDP_L4_URG                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_ACK + IS2_HKL_IP4_TCP_UDP_L4_ACK)
+#define IS2_HKL_IP4_TCP_UDP_L4_URG 1
+#define IS2_HKO_IP4_TCP_UDP_L4_1588_DOM                                        \
+	(IS2_HKO_IP4_TCP_UDP_L4_URG + IS2_HKL_IP4_TCP_UDP_L4_URG)
+#define IS2_HKL_IP4_TCP_UDP_L4_1588_DOM 8
+#define IS2_HKO_IP4_TCP_UDP_L4_1588_VER                                        \
+	(IS2_HKO_IP4_TCP_UDP_L4_1588_DOM + IS2_HKL_IP4_TCP_UDP_L4_1588_DOM)
+#define IS2_HKL_IP4_TCP_UDP_L4_1588_VER 4
+
+/* IS2 half key - IP4_OTHER */
+#define IS2_HKO_IP4_OTHER_L3_PROTO IS2_HKO_IP4_TCP_UDP_TCP
+#define IS2_HKL_IP4_OTHER_L3_PROTO 8
+#define IS2_HKO_IP4_OTHER_L3_PAYLOAD                                           \
+	(IS2_HKO_IP4_OTHER_L3_PROTO + IS2_HKL_IP4_OTHER_L3_PROTO)
+#define IS2_HKL_IP4_OTHER_L3_PAYLOAD 56
+
+/* IS2 half key - IP6_STD */
+#define IS2_HKO_IP6_STD_L3_TTL_GT0 IS2_HKO_L2_DMAC
+#define IS2_HKL_IP6_STD_L3_TTL_GT0 1
+#define IS2_HKO_IP6_STD_L3_IP6_SIP                                             \
+	(IS2_HKO_IP6_STD_L3_TTL_GT0 + IS2_HKL_IP6_STD_L3_TTL_GT0)
+#define IS2_HKL_IP6_STD_L3_IP6_SIP 128
+#define IS2_HKO_IP6_STD_L3_PROTO                                               \
+	(IS2_HKO_IP6_STD_L3_IP6_SIP + IS2_HKL_IP6_STD_L3_IP6_SIP)
+#define IS2_HKL_IP6_STD_L3_PROTO 8
+
+/* IS2 half key - OAM */
+#define IS2_HKO_OAM_OAM_MEL_FLAGS IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_OAM_OAM_MEL_FLAGS 7
+#define IS2_HKO_OAM_OAM_VER                                                    \
+	(IS2_HKO_OAM_OAM_MEL_FLAGS + IS2_HKL_OAM_OAM_MEL_FLAGS)
+#define IS2_HKL_OAM_OAM_VER 5
+#define IS2_HKO_OAM_OAM_OPCODE (IS2_HKO_OAM_OAM_VER + IS2_HKL_OAM_OAM_VER)
+#define IS2_HKL_OAM_OAM_OPCODE 8
+#define IS2_HKO_OAM_OAM_FLAGS (IS2_HKO_OAM_OAM_OPCODE + IS2_HKL_OAM_OAM_OPCODE)
+#define IS2_HKL_OAM_OAM_FLAGS 8
+#define IS2_HKO_OAM_OAM_MEPID (IS2_HKO_OAM_OAM_FLAGS + IS2_HKL_OAM_OAM_FLAGS)
+#define IS2_HKL_OAM_OAM_MEPID 16
+#define IS2_HKO_OAM_OAM_CCM_CNTS_EQ0                                           \
+	(IS2_HKO_OAM_OAM_MEPID + IS2_HKL_OAM_OAM_MEPID)
+#define IS2_HKL_OAM_OAM_CCM_CNTS_EQ0 1
+
+/* IS2 half key - SMAC_SIP6 */
+#define IS2_HKO_SMAC_SIP6_IGR_PORT IS2_HKL_TYPE
+#define IS2_HKL_SMAC_SIP6_IGR_PORT VCAP_PORT_WIDTH
+#define IS2_HKO_SMAC_SIP6_L2_SMAC                                              \
+	(IS2_HKO_SMAC_SIP6_IGR_PORT + IS2_HKL_SMAC_SIP6_IGR_PORT)
+#define IS2_HKL_SMAC_SIP6_L2_SMAC 48
+#define IS2_HKO_SMAC_SIP6_L3_IP6_SIP                                           \
+	(IS2_HKO_SMAC_SIP6_L2_SMAC + IS2_HKL_SMAC_SIP6_L2_SMAC)
+#define IS2_HKL_SMAC_SIP6_L3_IP6_SIP 128
+
+/* IS2 full key - common */
+#define IS2_FKO_TYPE 0
+#define IS2_FKL_TYPE 2
+#define IS2_FKO_FIRST (IS2_FKO_TYPE + IS2_FKL_TYPE)
+#define IS2_FKL_FIRST 1
+#define IS2_FKO_PAG (IS2_FKO_FIRST + IS2_FKL_FIRST)
+#define IS2_FKL_PAG 8
+#define IS2_FKO_IGR_PORT_MASK (IS2_FKO_PAG + IS2_FKL_PAG)
+#define IS2_FKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1)
+#define IS2_FKO_SERVICE_FRM (IS2_FKO_IGR_PORT_MASK + IS2_FKL_IGR_PORT_MASK)
+#define IS2_FKL_SERVICE_FRM 1
+#define IS2_FKO_HOST_MATCH (IS2_FKO_SERVICE_FRM + IS2_FKL_SERVICE_FRM)
+#define IS2_FKL_HOST_MATCH 1
+#define IS2_FKO_L2_MC (IS2_FKO_HOST_MATCH + IS2_FKL_HOST_MATCH)
+#define IS2_FKL_L2_MC 1
+#define IS2_FKO_L2_BC (IS2_FKO_L2_MC + IS2_FKL_L2_MC)
+#define IS2_FKL_L2_BC 1
+#define IS2_FKO_VLAN_TAGGED (IS2_FKO_L2_BC + IS2_FKL_L2_BC)
+#define IS2_FKL_VLAN_TAGGED 1
+#define IS2_FKO_VID (IS2_FKO_VLAN_TAGGED + IS2_FKL_VLAN_TAGGED)
+#define IS2_FKL_VID 12
+#define IS2_FKO_DEI (IS2_FKO_VID + IS2_FKL_VID)
+#define IS2_FKL_DEI 1
+#define IS2_FKO_PCP (IS2_FKO_DEI + IS2_FKL_DEI)
+#define IS2_FKL_PCP 3
+
+/* IS2 full key - IP6_TCP_UDP/IP6_OTHER common */
+#define IS2_FKO_L3_TTL_GT0 (IS2_FKO_PCP + IS2_FKL_PCP)
+#define IS2_FKL_L3_TTL_GT0 1
+#define IS2_FKO_L3_TOS (IS2_FKO_L3_TTL_GT0 + IS2_FKL_L3_TTL_GT0)
+#define IS2_FKL_L3_TOS 8
+#define IS2_FKO_L3_IP6_DIP (IS2_FKO_L3_TOS + IS2_FKL_L3_TOS)
+#define IS2_FKL_L3_IP6_DIP 128
+#define IS2_FKO_L3_IP6_SIP (IS2_FKO_L3_IP6_DIP + IS2_FKL_L3_IP6_DIP)
+#define IS2_FKL_L3_IP6_SIP 128
+#define IS2_FKO_DIP_EQ_SIP (IS2_FKO_L3_IP6_SIP + IS2_FKL_L3_IP6_SIP)
+#define IS2_FKL_DIP_EQ_SIP 1
+
+/* IS2 full key - IP6_TCP_UDP */
+#define IS2_FKO_IP6_TCP_UDP_TCP (IS2_FKO_DIP_EQ_SIP + IS2_FKL_DIP_EQ_SIP)
+#define IS2_FKL_IP6_TCP_UDP_TCP 1
+#define IS2_FKO_IP6_TCP_UDP_L4_DPORT                                           \
+	(IS2_FKO_IP6_TCP_UDP_TCP + IS2_FKL_IP6_TCP_UDP_TCP)
+#define IS2_FKL_IP6_TCP_UDP_L4_DPORT 16
+#define IS2_FKO_IP6_TCP_UDP_L4_SPORT                                           \
+	(IS2_FKO_IP6_TCP_UDP_L4_DPORT + IS2_FKL_IP6_TCP_UDP_L4_DPORT)
+#define IS2_FKL_IP6_TCP_UDP_L4_SPORT 16
+#define IS2_FKO_IP6_TCP_UDP_L4_RNG                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_SPORT + IS2_FKL_IP6_TCP_UDP_L4_SPORT)
+#define IS2_FKL_IP6_TCP_UDP_L4_RNG 8
+#define IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT                                     \
+	(IS2_FKO_IP6_TCP_UDP_L4_RNG + IS2_FKL_IP6_TCP_UDP_L4_RNG)
+#define IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT 1
+#define IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0                                       \
+	(IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT +                                  \
+	 IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT)
+#define IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0 1
+#define IS2_FKO_IP6_TCP_UDP_L4_FIN                                             \
+	(IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0 + IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0)
+#define IS2_FKL_IP6_TCP_UDP_L4_FIN 1
+#define IS2_FKO_IP6_TCP_UDP_L4_SYN                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_FIN + IS2_FKL_IP6_TCP_UDP_L4_FIN)
+#define IS2_FKL_IP6_TCP_UDP_L4_SYN 1
+#define IS2_FKO_IP6_TCP_UDP_L4_RST                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_SYN + IS2_FKL_IP6_TCP_UDP_L4_SYN)
+#define IS2_FKL_IP6_TCP_UDP_L4_RST 1
+#define IS2_FKO_IP6_TCP_UDP_L4_PSH                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_RST + IS2_FKL_IP6_TCP_UDP_L4_RST)
+#define IS2_FKL_IP6_TCP_UDP_L4_PSH 1
+#define IS2_FKO_IP6_TCP_UDP_L4_ACK                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_PSH + IS2_FKL_IP6_TCP_UDP_L4_PSH)
+#define IS2_FKL_IP6_TCP_UDP_L4_ACK 1
+#define IS2_FKO_IP6_TCP_UDP_L4_URG                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_ACK + IS2_FKL_IP6_TCP_UDP_L4_ACK)
+#define IS2_FKL_IP6_TCP_UDP_L4_URG 1
+#define IS2_FKO_IP6_TCP_UDP_L4_1588_DOM                                        \
+	(IS2_FKO_IP6_TCP_UDP_L4_URG + IS2_FKL_IP6_TCP_UDP_L4_URG)
+#define IS2_FKL_IP6_TCP_UDP_L4_1588_DOM 8
+#define IS2_FKO_IP6_TCP_UDP_L4_1588_VER                                        \
+	(IS2_FKO_IP6_TCP_UDP_L4_1588_DOM + IS2_FKL_IP6_TCP_UDP_L4_1588_DOM)
+#define IS2_FKL_IP6_TCP_UDP_L4_1588_VER 4
+
+/* IS2 full key - IP6_OTHER */
+#define IS2_FKO_IP6_OTHER_L3_PROTO IS2_FKO_IP6_TCP_UDP_TCP
+#define IS2_FKL_IP6_OTHER_L3_PROTO 8
+#define IS2_FKO_IP6_OTHER_L3_PAYLOAD                                           \
+	(IS2_FKO_IP6_OTHER_L3_PROTO + IS2_FKL_IP6_OTHER_L3_PROTO)
+#define IS2_FKL_IP6_OTHER_L3_PAYLOAD 56
+
+/* IS2 full key - CUSTOM */
+#define IS2_FKO_CUSTOM_CUSTOM_TYPE IS2_FKO_L3_TTL_GT0
+#define IS2_FKL_CUSTOM_CUSTOM_TYPE 1
+#define IS2_FKO_CUSTOM_CUSTOM                                                  \
+	(IS2_FKO_CUSTOM_CUSTOM_TYPE + IS2_FKL_CUSTOM_CUSTOM_TYPE)
+#define IS2_FKL_CUSTOM_CUSTOM 320
+
+/* IS2 action - BASE_TYPE */
+#define IS2_AO_HIT_ME_ONCE 0
+#define IS2_AL_HIT_ME_ONCE 1
+#define IS2_AO_CPU_COPY_ENA (IS2_AO_HIT_ME_ONCE + IS2_AL_HIT_ME_ONCE)
+#define IS2_AL_CPU_COPY_ENA 1
+#define IS2_AO_CPU_QU_NUM (IS2_AO_CPU_COPY_ENA + IS2_AL_CPU_COPY_ENA)
+#define IS2_AL_CPU_QU_NUM 3
+#define IS2_AO_MASK_MODE (IS2_AO_CPU_QU_NUM + IS2_AL_CPU_QU_NUM)
+#define IS2_AL_MASK_MODE 2
+#define IS2_AO_MIRROR_ENA (IS2_AO_MASK_MODE + IS2_AL_MASK_MODE)
+#define IS2_AL_MIRROR_ENA 1
+#define IS2_AO_LRN_DIS (IS2_AO_MIRROR_ENA + IS2_AL_MIRROR_ENA)
+#define IS2_AL_LRN_DIS 1
+#define IS2_AO_POLICE_ENA (IS2_AO_LRN_DIS + IS2_AL_LRN_DIS)
+#define IS2_AL_POLICE_ENA 1
+#define IS2_AO_POLICE_IDX (IS2_AO_POLICE_ENA + IS2_AL_POLICE_ENA)
+#define IS2_AL_POLICE_IDX 9
+#define IS2_AO_POLICE_VCAP_ONLY (IS2_AO_POLICE_IDX + IS2_AL_POLICE_IDX)
+#define IS2_AL_POLICE_VCAP_ONLY 1
+#define IS2_AO_PORT_MASK (IS2_AO_POLICE_VCAP_ONLY + IS2_AL_POLICE_VCAP_ONLY)
+#define IS2_AL_PORT_MASK VCAP_PORT_CNT
+#define IS2_AO_REW_OP (IS2_AO_PORT_MASK + IS2_AL_PORT_MASK)
+#define IS2_AL_REW_OP 9
+#define IS2_AO_LM_CNT_DIS (IS2_AO_REW_OP + IS2_AL_REW_OP)
+#define IS2_AL_LM_CNT_DIS 1
+#define IS2_AO_ISDX_ENA                                                        \
+	(IS2_AO_LM_CNT_DIS + IS2_AL_LM_CNT_DIS + 1) /* Reserved bit */
+#define IS2_AL_ISDX_ENA 1
+#define IS2_AO_ACL_ID (IS2_AO_ISDX_ENA + IS2_AL_ISDX_ENA)
+#define IS2_AL_ACL_ID 6
+
+/* IS2 action - SMAC_SIP */
+#define IS2_AO_SMAC_SIP_CPU_COPY_ENA 0
+#define IS2_AL_SMAC_SIP_CPU_COPY_ENA 1
+#define IS2_AO_SMAC_SIP_CPU_QU_NUM 1
+#define IS2_AL_SMAC_SIP_CPU_QU_NUM 3
+#define IS2_AO_SMAC_SIP_FWD_KILL_ENA 4
+#define IS2_AL_SMAC_SIP_FWD_KILL_ENA 1
+#define IS2_AO_SMAC_SIP_HOST_MATCH 5
+#define IS2_AL_SMAC_SIP_HOST_MATCH 1
+
+#endif /* _OCELOT_VCAP_H_ */
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 4ad5109059e0..bac5be4d4f43 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -20,6 +20,7 @@ config NFP
 	tristate "Netronome(R) NFP4000/NFP6000 NIC driver"
 	depends on PCI && PCI_MSI
 	depends on VXLAN || VXLAN=n
+	depends on TLS && TLS_DEVICE || TLS_DEVICE=n
 	select NET_DEVLINK
 	---help---
 	  This driver supports the Netronome(R) NFP4000/NFP6000 based
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 87bf784f8e8f..2805641965f3 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -16,6 +16,7 @@ nfp-objs := \
 	    nfpcore/nfp_rtsym.o \
 	    nfpcore/nfp_target.o \
 	    ccm.o \
+	    ccm_mbox.o \
 	    nfp_asm.o \
 	    nfp_app.o \
 	    nfp_app_nic.o \
@@ -34,6 +35,11 @@ nfp-objs := \
 	    nfp_shared_buf.o \
 	    nic/main.o
 
+ifeq ($(CONFIG_TLS_DEVICE),y)
+nfp-objs += \
+	    crypto/tls.o
+endif
+
 ifeq ($(CONFIG_NFP_APP_FLOWER),y)
 nfp-objs += \
 	    flower/action.o \
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index ff3913085665..23ebddfb9532 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -262,22 +262,12 @@ static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
 	}
 }
 
+static LIST_HEAD(nfp_abm_block_cb_list);
+
 int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr,
-			    struct tc_block_offload *f)
+			    struct flow_block_offload *f)
 {
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     nfp_abm_setup_tc_block_cb,
-					     repr, repr, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, nfp_abm_setup_tc_block_cb,
-					repr);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
+	return flow_block_cb_setup_simple(f, &nfp_abm_block_cb_list,
+					  nfp_abm_setup_tc_block_cb,
+					  repr, repr, true);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 49749c60885e..48746c9c6224 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -247,7 +247,7 @@ int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
 int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink,
 			  struct tc_gred_qopt_offload *opt);
 int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr,
-			    struct tc_block_offload *opt);
+			    struct flow_block_offload *opt);
 
 int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index d4bf0e694541..4054b70d7719 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -623,6 +623,13 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
 }
 
 static void
+wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
+{
+	if (meta->flags & FLAG_INSN_DO_ZEXT)
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+}
+
+static void
 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
 	       enum nfp_relo_type relo)
 {
@@ -858,7 +865,8 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 }
 
 static int
-data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
+data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
+	u8 dst_gpr, int size)
 {
 	unsigned int i;
 	u16 shift, sz;
@@ -881,14 +889,15 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
 
 	if (i < 2)
-		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 
 	return 0;
 }
 
 static int
-data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
-		   swreg lreg, swreg rreg, int size, enum cmd_mode mode)
+data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		   u8 dst_gpr, swreg lreg, swreg rreg, int size,
+		   enum cmd_mode mode)
 {
 	unsigned int i;
 	u8 mask, sz;
@@ -911,33 +920,34 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
 
 	if (i < 2)
-		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 
 	return 0;
 }
 
 static int
-data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
-			  u8 dst_gpr, u8 size)
+data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
 {
-	return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
-				  size, CMD_MODE_32b);
+	return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
+				  offset, size, CMD_MODE_32b);
 }
 
 static int
-data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
-			  u8 dst_gpr, u8 size)
+data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
 {
 	swreg rega, regb;
 
 	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
 
-	return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
+	return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
 				  size, CMD_MODE_40b_BA);
 }
 
 static int
-construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
+construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		      u16 offset, u16 src, u8 size)
 {
 	swreg tmp_reg;
 
@@ -953,10 +963,12 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
 
 	/* Load data */
-	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
+	return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
 }
 
-static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+static int
+construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  u16 offset, u8 size)
 {
 	swreg tmp_reg;
 
@@ -967,7 +979,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
 
 	/* Load data */
 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
-	return data_ld(nfp_prog, tmp_reg, 0, size);
+	return data_ld(nfp_prog, meta, tmp_reg, 0, size);
 }
 
 static int
@@ -1204,7 +1216,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	}
 
 	if (clr_gpr && size < 8)
-		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+		wrp_zext(nfp_prog, meta, gpr);
 
 	while (size) {
 		u32 slice_end;
@@ -1305,9 +1317,10 @@ wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	      enum alu_op alu_op)
 {
 	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
 
-	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+	wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -1319,7 +1332,7 @@ wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
 
 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -2396,12 +2409,14 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	u8 dst = meta->insn.dst_reg * 2;
 
 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
 
-static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+static int
+__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	   u8 shift_amt)
 {
 	if (shift_amt) {
 		/* Set signedness bit (MSB of result). */
@@ -2410,7 +2425,7 @@ static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
 		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
 			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
 	}
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -2425,7 +2440,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	umin = meta->umin_src;
 	umax = meta->umax_src;
 	if (umin == umax)
-		return __ashr_imm(nfp_prog, dst, umin);
+		return __ashr_imm(nfp_prog, meta, dst, umin);
 
 	src = insn->src_reg * 2;
 	/* NOTE: the first insn will set both indirect shift amount (source A)
@@ -2434,7 +2449,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
 	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
 		       reg_b(dst), SHF_SC_R_SHF);
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -2444,15 +2459,17 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
 
-	return __ashr_imm(nfp_prog, dst, insn->imm);
+	return __ashr_imm(nfp_prog, meta, dst, insn->imm);
 }
 
-static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+static int
+__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	  u8 shift_amt)
 {
 	if (shift_amt)
 		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
 			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 	return 0;
 }
 
@@ -2461,7 +2478,7 @@ static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
 
-	return __shr_imm(nfp_prog, dst, insn->imm);
+	return __shr_imm(nfp_prog, meta, dst, insn->imm);
 }
 
 static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2474,22 +2491,24 @@ static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	umin = meta->umin_src;
 	umax = meta->umax_src;
 	if (umin == umax)
-		return __shr_imm(nfp_prog, dst, umin);
+		return __shr_imm(nfp_prog, meta, dst, umin);
 
 	src = insn->src_reg * 2;
 	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
 	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
 		       reg_b(dst), SHF_SC_R_SHF);
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 	return 0;
 }
 
-static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+static int
+__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	  u8 shift_amt)
 {
 	if (shift_amt)
 		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
 			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 	return 0;
 }
 
@@ -2498,7 +2517,7 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
 
-	return __shl_imm(nfp_prog, dst, insn->imm);
+	return __shl_imm(nfp_prog, meta, dst, insn->imm);
 }
 
 static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2511,11 +2530,11 @@ static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	umin = meta->umin_src;
 	umax = meta->umax_src;
 	if (umin == umax)
-		return __shl_imm(nfp_prog, dst, umin);
+		return __shl_imm(nfp_prog, meta, dst, umin);
 
 	src = insn->src_reg * 2;
 	shl_reg64_lt32_low(nfp_prog, dst, src);
-	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 	return 0;
 }
 
@@ -2577,34 +2596,34 @@ static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
 }
 
 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
 }
 
 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
 }
 
 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 1);
 }
 
 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 2);
 }
 
 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 4);
 }
 
@@ -2682,7 +2701,7 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
-	return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
+	return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
 					 tmp_reg, meta->insn.dst_reg * 2, size);
 }
 
@@ -2694,7 +2713,7 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
-	return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
+	return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
 					 tmp_reg, meta->insn.dst_reg * 2, size);
 }
 
@@ -2755,7 +2774,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
 	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
 
 	if (!len_mid) {
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 		return 0;
 	}
 
@@ -2763,7 +2782,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
 
 	if (size <= REG_WIDTH) {
 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else {
 		swreg src_hi = reg_xfer(idx + 2);
 
@@ -2794,10 +2813,10 @@ mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
 
 	if (size < REG_WIDTH) {
 		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else if (size == REG_WIDTH) {
 		wrp_mov(nfp_prog, dst_lo, src_lo);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else {
 		swreg src_hi = reg_xfer(idx + 1);
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 9c136da25221..1c9fb11470df 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -160,35 +160,19 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 	return 0;
 }
 
-static int nfp_bpf_setup_tc_block(struct net_device *netdev,
-				  struct tc_block_offload *f)
-{
-	struct nfp_net *nn = netdev_priv(netdev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     nfp_bpf_setup_tc_block_cb,
-					     nn, nn, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					nfp_bpf_setup_tc_block_cb,
-					nn);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(nfp_bpf_block_cb_list);
 
 static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
 			    enum tc_setup_type type, void *type_data)
 {
+	struct nfp_net *nn = netdev_priv(netdev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return nfp_bpf_setup_tc_block(netdev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &nfp_bpf_block_cb_list,
+						  nfp_bpf_setup_tc_block_cb,
+						  nn, nn, true);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index e54d1ac84df2..57d6ff51e980 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,6 +238,8 @@ struct nfp_bpf_reg_state {
 #define FLAG_INSN_SKIP_PREC_DEPENDENT		BIT(4)
 /* Instruction is optimized by the verifier */
 #define FLAG_INSN_SKIP_VERIFIER_OPT		BIT(5)
+/* Instruction needs to zero extend to high 32-bit */
+#define FLAG_INSN_DO_ZEXT			BIT(6)
 
 #define FLAG_INSN_SKIP_MASK		(FLAG_INSN_SKIP_NOOP | \
 					 FLAG_INSN_SKIP_PREC_DEPENDENT | \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 36f56eb4cbe2..e92ee510fd52 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -744,6 +744,17 @@ continue_subprog:
 	goto continue_subprog;
 }
 
+static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog,
+				   struct bpf_insn_aux_data *aux)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (aux[meta->n].zext_dst)
+			meta->flags |= FLAG_INSN_DO_ZEXT;
+	}
+}
+
 int nfp_bpf_finalize(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *info;
@@ -784,6 +795,7 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env)
 		return -EOPNOTSUPP;
 	}
 
+	nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c
index 94476e41e261..71afd111bae3 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.c
+++ b/drivers/net/ethernet/netronome/nfp/ccm.c
@@ -7,9 +7,6 @@
 #include "nfp_app.h"
 #include "nfp_net.h"
 
-#define NFP_CCM_TYPE_REPLY_BIT		7
-#define __NFP_CCM_REPLY(req)		(BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
-
 #define ccm_warn(app, msg...)	nn_dp_warn(&(app)->ctrl->dp, msg)
 
 #define NFP_CCM_TAG_ALLOC_SPAN	(U16_MAX / 4)
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
index ac963b128203..a460c75522be 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.h
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h
@@ -9,6 +9,7 @@
 #include <linux/wait.h>
 
 struct nfp_app;
+struct nfp_net;
 
 /* Firmware ABI */
 
@@ -21,15 +22,27 @@ enum nfp_ccm_type {
 	NFP_CCM_TYPE_BPF_MAP_GETNEXT	= 6,
 	NFP_CCM_TYPE_BPF_MAP_GETFIRST	= 7,
 	NFP_CCM_TYPE_BPF_BPF_EVENT	= 8,
+	NFP_CCM_TYPE_CRYPTO_RESET	= 9,
+	NFP_CCM_TYPE_CRYPTO_ADD		= 10,
+	NFP_CCM_TYPE_CRYPTO_DEL		= 11,
+	NFP_CCM_TYPE_CRYPTO_UPDATE	= 12,
 	__NFP_CCM_TYPE_MAX,
 };
 
 #define NFP_CCM_ABI_VERSION		1
 
+#define NFP_CCM_TYPE_REPLY_BIT		7
+#define __NFP_CCM_REPLY(req)		(BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
+
 struct nfp_ccm_hdr {
-	u8 type;
-	u8 ver;
-	__be16 tag;
+	union {
+		struct {
+			u8 type;
+			u8 ver;
+			__be16 tag;
+		};
+		__be32 raw;
+	};
 };
 
 static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
@@ -41,15 +54,31 @@ static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
 	return hdr->type;
 }
 
-static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+static inline __be16 __nfp_ccm_get_tag(struct sk_buff *skb)
 {
 	struct nfp_ccm_hdr *hdr;
 
 	hdr = (struct nfp_ccm_hdr *)skb->data;
 
-	return be16_to_cpu(hdr->tag);
+	return hdr->tag;
+}
+
+static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+{
+	return be16_to_cpu(__nfp_ccm_get_tag(skb));
 }
 
+#define NFP_NET_MBOX_TLV_TYPE		GENMASK(31, 16)
+#define NFP_NET_MBOX_TLV_LEN		GENMASK(15, 0)
+
+enum nfp_ccm_mbox_tlv_type {
+	NFP_NET_MBOX_TLV_TYPE_UNKNOWN	= 0,
+	NFP_NET_MBOX_TLV_TYPE_END	= 1,
+	NFP_NET_MBOX_TLV_TYPE_MSG	= 2,
+	NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP	= 3,
+	NFP_NET_MBOX_TLV_TYPE_RESV	= 4,
+};
+
 /* Implementation */
 
 /**
@@ -71,7 +100,7 @@ struct nfp_ccm {
 	u16 tag_alloc_last;
 
 	struct sk_buff_head replies;
-	struct wait_queue_head wq;
+	wait_queue_head_t wq;
 };
 
 int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app);
@@ -80,4 +109,23 @@ void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb);
 struct sk_buff *
 nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
 		    enum nfp_ccm_type type, unsigned int reply_size);
+
+int nfp_ccm_mbox_alloc(struct nfp_net *nn);
+void nfp_ccm_mbox_free(struct nfp_net *nn);
+int nfp_ccm_mbox_init(struct nfp_net *nn);
+void nfp_ccm_mbox_clean(struct nfp_net *nn);
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size);
+struct sk_buff *
+nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size,
+		       unsigned int reply_size, gfp_t flags);
+int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			       enum nfp_ccm_type type,
+			       unsigned int reply_size,
+			       unsigned int max_reply_size, bool critical);
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size);
+int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb,
+		      enum nfp_ccm_type type, unsigned int max_reply_size);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
new file mode 100644
index 000000000000..f0783aa9e66e
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/skbuff.h>
+
+#include "ccm.h"
+#include "nfp_net.h"
+
+/* CCM messages via the mailbox.  CMSGs get wrapped into simple TLVs
+ * and copied into the mailbox.  Multiple messages can be copied to
+ * form a batch.  Threads come in with CMSG formed in an skb, then
+ * enqueue that skb onto the request queue.  If threads skb is first
+ * in queue this thread will handle the mailbox operation.  It copies
+ * up to 64 messages into the mailbox (making sure that both requests
+ * and replies will fit.  After FW is done processing the batch it
+ * copies the data out and wakes waiting threads.
+ * If a thread is waiting it either gets its the message completed
+ * (response is copied into the same skb as the request, overwriting
+ * it), or becomes the first in queue.
+ * Completions and next-to-run are signaled via the control buffer
+ * to limit potential cache line bounces.
+ */
+
+#define NFP_CCM_MBOX_BATCH_LIMIT	64
+#define NFP_CCM_TIMEOUT			(NFP_NET_POLL_TIMEOUT * 1000)
+#define NFP_CCM_MAX_QLEN		1024
+
+enum nfp_net_mbox_cmsg_state {
+	NFP_NET_MBOX_CMSG_STATE_QUEUED,
+	NFP_NET_MBOX_CMSG_STATE_NEXT,
+	NFP_NET_MBOX_CMSG_STATE_BUSY,
+	NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND,
+	NFP_NET_MBOX_CMSG_STATE_DONE,
+};
+
+/**
+ * struct nfp_ccm_mbox_skb_cb - CCM mailbox specific info
+ * @state:	processing state (/stage) of the message
+ * @err:	error encountered during processing if any
+ * @max_len:	max(request_len, reply_len)
+ * @exp_reply:	expected reply length (0 means don't validate)
+ * @posted:	the message was posted and nobody waits for the reply
+ */
+struct nfp_ccm_mbox_cmsg_cb {
+	enum nfp_net_mbox_cmsg_state state;
+	int err;
+	unsigned int max_len;
+	unsigned int exp_reply;
+	bool posted;
+};
+
+static u32 nfp_ccm_mbox_max_msg(struct nfp_net *nn)
+{
+	return round_down(nn->tlv_caps.mbox_len, 4) -
+		NFP_NET_CFG_MBOX_SIMPLE_VAL - /* common mbox command header */
+		4 * 2; /* Msg TLV plus End TLV headers */
+}
+
+static void
+nfp_ccm_mbox_msg_init(struct sk_buff *skb, unsigned int exp_reply, int max_len)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_QUEUED;
+	cb->err = 0;
+	cb->max_len = max_len;
+	cb->exp_reply = exp_reply;
+	cb->posted = false;
+}
+
+static int nfp_ccm_mbox_maxlen(const struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->max_len;
+}
+
+static bool nfp_ccm_mbox_done(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_DONE;
+}
+
+static bool nfp_ccm_mbox_in_progress(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state != NFP_NET_MBOX_CMSG_STATE_QUEUED &&
+	       cb->state != NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_set_busy(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_BUSY;
+}
+
+static bool nfp_ccm_mbox_is_posted(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->posted;
+}
+
+static void nfp_ccm_mbox_mark_posted(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->posted = true;
+}
+
+static bool nfp_ccm_mbox_is_first(struct nfp_net *nn, struct sk_buff *skb)
+{
+	return skb_queue_is_first(&nn->mbox_cmsg.queue, skb);
+}
+
+static bool nfp_ccm_mbox_should_run(struct nfp_net *nn, struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_mark_next_runner(struct nfp_net *nn)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&nn->mbox_cmsg.queue);
+	if (!skb)
+		return;
+
+	cb = (void *)skb->cb;
+	cb->state = NFP_NET_MBOX_CMSG_STATE_NEXT;
+	if (cb->posted)
+		queue_work(nn->mbox_cmsg.workq, &nn->mbox_cmsg.runq_work);
+}
+
+static void
+nfp_ccm_mbox_write_tlv(struct nfp_net *nn, u32 off, u32 type, u32 len)
+{
+	nn_writel(nn, off,
+		  FIELD_PREP(NFP_NET_MBOX_TLV_TYPE, type) |
+		  FIELD_PREP(NFP_NET_MBOX_TLV_LEN, len));
+}
+
+static void nfp_ccm_mbox_copy_in(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+	int reserve, i, cnt;
+	__be32 *data;
+	u32 off, len;
+
+	off = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_MSG,
+				       skb->len);
+		off += 4;
+
+		/* Write data word by word, skb->data should be aligned */
+		data = (__be32 *)skb->data;
+		cnt = skb->len / 4;
+		for (i = 0 ; i < cnt; i++) {
+			nn_writel(nn, off, be32_to_cpu(data[i]));
+			off += 4;
+		}
+		if (skb->len & 3) {
+			__be32 tmp = 0;
+
+			memcpy(&tmp, &data[i], skb->len & 3);
+			nn_writel(nn, off, be32_to_cpu(tmp));
+			off += 4;
+		}
+
+		/* Reserve space if reply is bigger */
+		len = round_up(skb->len, 4);
+		reserve = nfp_ccm_mbox_maxlen(skb) - len;
+		if (reserve > 0) {
+			nfp_ccm_mbox_write_tlv(nn, off,
+					       NFP_NET_MBOX_TLV_TYPE_RESV,
+					       reserve);
+			off += 4 + reserve;
+		}
+
+		if (skb == last)
+			break;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+
+	nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_END, 0);
+}
+
+static struct sk_buff *
+nfp_ccm_mbox_find_req(struct nfp_net *nn, __be16 tag, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		if (__nfp_ccm_get_tag(skb) == tag)
+			return skb;
+
+		if (skb == last)
+			return NULL;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+}
+
+static void nfp_ccm_mbox_copy_out(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	u8 __iomem *data, *end;
+	struct sk_buff *skb;
+
+	data = nn->dp.ctrl_bar + nn->tlv_caps.mbox_off +
+		NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	end = data + nn->tlv_caps.mbox_len;
+
+	while (true) {
+		unsigned int length, offset, type;
+		struct nfp_ccm_hdr hdr;
+		u32 tlv_hdr;
+
+		tlv_hdr = readl(data);
+		type = FIELD_GET(NFP_NET_MBOX_TLV_TYPE, tlv_hdr);
+		length = FIELD_GET(NFP_NET_MBOX_TLV_LEN, tlv_hdr);
+		offset = data - nn->dp.ctrl_bar;
+
+		/* Advance past the header */
+		data += 4;
+
+		if (data + length > end) {
+			nn_dp_warn(&nn->dp, "mailbox oversized TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_END)
+			break;
+		if (type == NFP_NET_MBOX_TLV_TYPE_RESV)
+			goto next_tlv;
+		if (type != NFP_NET_MBOX_TLV_TYPE_MSG &&
+		    type != NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp, "mailbox unknown TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (length < 4) {
+			nn_dp_warn(&nn->dp, "mailbox msg too short to contain header TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		hdr.raw = cpu_to_be32(readl(data));
+
+		skb = nfp_ccm_mbox_find_req(nn, hdr.tag, last);
+		if (!skb) {
+			nn_dp_warn(&nn->dp, "mailbox request not found:%u\n",
+				   be16_to_cpu(hdr.tag));
+			break;
+		}
+		cb = (void *)skb->cb;
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp,
+				   "mailbox msg not supported type:%d\n",
+				   nfp_ccm_get_type(skb));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (hdr.type != __NFP_CCM_REPLY(nfp_ccm_get_type(skb))) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong type:%u expected:%lu\n",
+				   hdr.type,
+				   __NFP_CCM_REPLY(nfp_ccm_get_type(skb)));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (cb->exp_reply && length != cb->exp_reply) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong size type:%u expected:%u have:%u\n",
+				   hdr.type, length, cb->exp_reply);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (length > cb->max_len) {
+			nn_dp_warn(&nn->dp, "mailbox msg oversized reply type:%u max:%u have:%u\n",
+				   hdr.type, cb->max_len, length);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (!cb->posted) {
+			__be32 *skb_data;
+			int i, cnt;
+
+			if (length <= skb->len)
+				__skb_trim(skb, length);
+			else
+				skb_put(skb, length - skb->len);
+
+			/* We overcopy here slightly, but that's okay,
+			 * the skb is large enough, and the garbage will
+			 * be ignored (beyond skb->len).
+			 */
+			skb_data = (__be32 *)skb->data;
+			memcpy(skb_data, &hdr, 4);
+
+			cnt = DIV_ROUND_UP(length, 4);
+			for (i = 1 ; i < cnt; i++)
+				skb_data[i] = cpu_to_be32(readl(data + i * 4));
+		}
+
+		cb->state = NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND;
+next_tlv:
+		data += round_up(length, 4);
+		if (data + 4 > end) {
+			nn_dp_warn(&nn->dp,
+				   "reached end of MBOX without END TLV\n");
+			break;
+		}
+	}
+
+	smp_wmb(); /* order the skb->data vs. cb->state */
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		if (cb->state != NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND) {
+			cb->err = -ENOENT;
+			smp_wmb(); /* order the cb->err vs. cb->state */
+		}
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+
+		if (cb->posted) {
+			if (cb->err)
+				nn_dp_warn(&nn->dp,
+					   "mailbox posted msg failed type:%u err:%d\n",
+					   nfp_ccm_get_type(skb), cb->err);
+			dev_consume_skb_any(skb);
+		}
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void
+nfp_ccm_mbox_mark_all_err(struct nfp_net *nn, struct sk_buff *last, int err)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		cb->err = err;
+		smp_wmb(); /* order the cb->err vs. cb->state */
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void nfp_ccm_mbox_run_queue_unlock(struct nfp_net *nn)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	int space = nn->tlv_caps.mbox_len - NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	struct sk_buff *skb, *last;
+	int cnt, err;
+
+	space -= 4; /* for End TLV */
+
+	/* First skb must fit, because it's ours and we checked it fits */
+	cnt = 1;
+	last = skb = __skb_peek(&nn->mbox_cmsg.queue);
+	space -= 4 + nfp_ccm_mbox_maxlen(skb);
+
+	while (!skb_queue_is_last(&nn->mbox_cmsg.queue, last)) {
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, last);
+		space -= 4 + nfp_ccm_mbox_maxlen(skb);
+		if (space < 0)
+			break;
+		last = skb;
+		nfp_ccm_mbox_set_busy(skb);
+		cnt++;
+		if (cnt == NFP_CCM_MBOX_BATCH_LIMIT)
+			break;
+	}
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	/* Now we own all skb's marked in progress, new requests may arrive
+	 * at the end of the queue.
+	 */
+
+	nn_ctrl_bar_lock(nn);
+
+	nfp_ccm_mbox_copy_in(nn, last);
+
+	err = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_TLV_CMSG);
+	if (!err)
+		nfp_ccm_mbox_copy_out(nn, last);
+	else
+		nfp_ccm_mbox_mark_all_err(nn, last, -EIO);
+
+	nn_ctrl_bar_unlock(nn);
+
+	wake_up_all(&nn->mbox_cmsg.wq);
+}
+
+static int nfp_ccm_mbox_skb_return(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	if (cb->err)
+		dev_kfree_skb_any(skb);
+	return cb->err;
+}
+
+/* If wait timed out but the command is already in progress we have
+ * to wait until it finishes.  Runners has ownership of the skbs marked
+ * as busy.
+ */
+static int
+nfp_ccm_mbox_unlink_unlock(struct nfp_net *nn, struct sk_buff *skb,
+			   enum nfp_ccm_type type)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	bool was_first;
+
+	if (nfp_ccm_mbox_in_progress(skb)) {
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		wait_event(nn->mbox_cmsg.wq, nfp_ccm_mbox_done(skb));
+		smp_rmb(); /* pairs with smp_wmb() after data is written */
+		return nfp_ccm_mbox_skb_return(skb);
+	}
+
+	was_first = nfp_ccm_mbox_should_run(nn, skb);
+	__skb_unlink(skb, &nn->mbox_cmsg.queue);
+	if (was_first)
+		nfp_ccm_mbox_mark_next_runner(nn);
+
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	if (was_first)
+		wake_up_all(&nn->mbox_cmsg.wq);
+
+	nn_dp_warn(&nn->dp, "time out waiting for mbox response to 0x%02x\n",
+		   type);
+	return -ETIMEDOUT;
+}
+
+static int
+nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type,
+			 unsigned int reply_size, unsigned int max_reply_size,
+			 gfp_t flags)
+{
+	const unsigned int mbox_max = nfp_ccm_mbox_max_msg(nn);
+	unsigned int max_len;
+	ssize_t undersize;
+	int err;
+
+	if (unlikely(!(nn->tlv_caps.mbox_cmsg_types & BIT(type)))) {
+		nn_dp_warn(&nn->dp,
+			   "message type %d not supported by mailbox\n", type);
+		return -EINVAL;
+	}
+
+	/* If the reply size is unknown assume it will take the entire
+	 * mailbox, the callers should do their best for this to never
+	 * happen.
+	 */
+	if (!max_reply_size)
+		max_reply_size = mbox_max;
+	max_reply_size = round_up(max_reply_size, 4);
+
+	/* Make sure we can fit the entire reply into the skb,
+	 * and that we don't have to slow down the mbox handler
+	 * with allocations.
+	 */
+	undersize = max_reply_size - (skb_end_pointer(skb) - skb->data);
+	if (undersize > 0) {
+		err = pskb_expand_head(skb, 0, undersize, flags);
+		if (err) {
+			nn_dp_warn(&nn->dp,
+				   "can't allocate reply buffer for mailbox\n");
+			return err;
+		}
+	}
+
+	/* Make sure that request and response both fit into the mailbox */
+	max_len = max(max_reply_size, round_up(skb->len, 4));
+	if (max_len > mbox_max) {
+		nn_dp_warn(&nn->dp,
+			   "message too big for tha mailbox: %u/%u vs %u\n",
+			   skb->len, max_reply_size, mbox_max);
+		return -EMSGSIZE;
+	}
+
+	nfp_ccm_mbox_msg_init(skb, reply_size, max_len);
+
+	return 0;
+}
+
+static int
+nfp_ccm_mbox_msg_enqueue(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type, bool critical)
+{
+	struct nfp_ccm_hdr *hdr;
+
+	assert_spin_locked(&nn->mbox_cmsg.queue.lock);
+
+	if (!critical && nn->mbox_cmsg.queue.qlen >= NFP_CCM_MAX_QLEN) {
+		nn_dp_warn(&nn->dp, "mailbox request queue too long\n");
+		return -EBUSY;
+	}
+
+	hdr = (void *)skb->data;
+	hdr->ver = NFP_CCM_ABI_VERSION;
+	hdr->type = type;
+	hdr->tag = cpu_to_be16(nn->mbox_cmsg.tag++);
+
+	__skb_queue_tail(&nn->mbox_cmsg.queue, skb);
+
+	return 0;
+}
+
+int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			       enum nfp_ccm_type type,
+			       unsigned int reply_size,
+			       unsigned int max_reply_size, bool critical)
+{
+	int err;
+
+	err = nfp_ccm_mbox_msg_prepare(nn, skb, type, reply_size,
+				       max_reply_size, GFP_KERNEL);
+	if (err)
+		goto err_free_skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, critical);
+	if (err)
+		goto err_unlock;
+
+	/* First in queue takes the mailbox lock and processes the batch */
+	if (!nfp_ccm_mbox_is_first(nn, skb)) {
+		bool to;
+
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		to = !wait_event_timeout(nn->mbox_cmsg.wq,
+					 nfp_ccm_mbox_done(skb) ||
+					 nfp_ccm_mbox_should_run(nn, skb),
+					 msecs_to_jiffies(NFP_CCM_TIMEOUT));
+
+		/* fast path for those completed by another thread */
+		if (nfp_ccm_mbox_done(skb)) {
+			smp_rmb(); /* pairs with wmb after data is written */
+			return nfp_ccm_mbox_skb_return(skb);
+		}
+
+		spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+		if (!nfp_ccm_mbox_is_first(nn, skb)) {
+			WARN_ON(!to);
+
+			err = nfp_ccm_mbox_unlink_unlock(nn, skb, type);
+			if (err)
+				goto err_free_skb;
+			return 0;
+		}
+	}
+
+	/* run queue expects the lock held */
+	nfp_ccm_mbox_run_queue_unlock(nn);
+	return nfp_ccm_mbox_skb_return(skb);
+
+err_unlock:
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	return err;
+}
+
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size)
+{
+	return __nfp_ccm_mbox_communicate(nn, skb, type, reply_size,
+					  max_reply_size, false);
+}
+
+static void nfp_ccm_mbox_post_runq_work(struct work_struct *work)
+{
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+
+	nn = container_of(work, struct nfp_net, mbox_cmsg.runq_work);
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb) ||
+		    !nfp_ccm_mbox_should_run(nn, skb))) {
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+		return;
+	}
+
+	nfp_ccm_mbox_run_queue_unlock(nn);
+}
+
+static void nfp_ccm_mbox_post_wait_work(struct work_struct *work)
+{
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+	int err;
+
+	nn = container_of(work, struct nfp_net, mbox_cmsg.wait_work);
+
+	skb = skb_peek(&nn->mbox_cmsg.queue);
+	if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb)))
+		/* Should never happen so it's unclear what to do here.. */
+		goto exit_unlock_wake;
+
+	err = nfp_net_mbox_reconfig_wait_posted(nn);
+	if (!err)
+		nfp_ccm_mbox_copy_out(nn, skb);
+	else
+		nfp_ccm_mbox_mark_all_err(nn, skb, -EIO);
+exit_unlock_wake:
+	nn_ctrl_bar_unlock(nn);
+	wake_up_all(&nn->mbox_cmsg.wq);
+}
+
+int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb,
+		      enum nfp_ccm_type type, unsigned int max_reply_size)
+{
+	int err;
+
+	err = nfp_ccm_mbox_msg_prepare(nn, skb, type, 0, max_reply_size,
+				       GFP_ATOMIC);
+	if (err)
+		goto err_free_skb;
+
+	nfp_ccm_mbox_mark_posted(skb);
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, false);
+	if (err)
+		goto err_unlock;
+
+	if (nfp_ccm_mbox_is_first(nn, skb)) {
+		if (nn_ctrl_bar_trylock(nn)) {
+			nfp_ccm_mbox_copy_in(nn, skb);
+			nfp_net_mbox_reconfig_post(nn,
+						   NFP_NET_CFG_MBOX_CMD_TLV_CMSG);
+			queue_work(nn->mbox_cmsg.workq,
+				   &nn->mbox_cmsg.wait_work);
+		} else {
+			nfp_ccm_mbox_mark_next_runner(nn);
+		}
+	}
+
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	return 0;
+
+err_unlock:
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	return err;
+}
+
+struct sk_buff *
+nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size,
+		       unsigned int reply_size, gfp_t flags)
+{
+	unsigned int max_size;
+	struct sk_buff *skb;
+
+	if (!reply_size)
+		max_size = nfp_ccm_mbox_max_msg(nn);
+	else
+		max_size = max(req_size, reply_size);
+	max_size = round_up(max_size, 4);
+
+	skb = alloc_skb(max_size, flags);
+	if (!skb)
+		return NULL;
+
+	skb_put(skb, req_size);
+
+	return skb;
+}
+
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size)
+{
+	return nfp_ccm_mbox_max_msg(nn) >= size;
+}
+
+int nfp_ccm_mbox_init(struct nfp_net *nn)
+{
+	return 0;
+}
+
+void nfp_ccm_mbox_clean(struct nfp_net *nn)
+{
+	drain_workqueue(nn->mbox_cmsg.workq);
+}
+
+int nfp_ccm_mbox_alloc(struct nfp_net *nn)
+{
+	skb_queue_head_init(&nn->mbox_cmsg.queue);
+	init_waitqueue_head(&nn->mbox_cmsg.wq);
+	INIT_WORK(&nn->mbox_cmsg.wait_work, nfp_ccm_mbox_post_wait_work);
+	INIT_WORK(&nn->mbox_cmsg.runq_work, nfp_ccm_mbox_post_runq_work);
+
+	nn->mbox_cmsg.workq = alloc_workqueue("nfp-ccm-mbox", WQ_UNBOUND, 0);
+	if (!nn->mbox_cmsg.workq)
+		return -ENOMEM;
+	return 0;
+}
+
+void nfp_ccm_mbox_free(struct nfp_net *nn)
+{
+	destroy_workqueue(nn->mbox_cmsg.workq);
+	WARN_ON(!skb_queue_empty(&nn->mbox_cmsg.queue));
+}
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
new file mode 100644
index 000000000000..60372ddf69f0
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_H
+#define NFP_CRYPTO_H 1
+
+struct nfp_net_tls_offload_ctx {
+	__be32 fw_handle[2];
+
+	u8 rx_end[0];
+	/* Tx only fields follow - Rx side does not have enough driver state
+	 * to fit these
+	 */
+
+	u32 next_seq;
+};
+
+#ifdef CONFIG_TLS_DEVICE
+int nfp_net_tls_init(struct nfp_net *nn);
+#else
+static inline int nfp_net_tls_init(struct nfp_net *nn)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
new file mode 100644
index 000000000000..67413d946c4a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_FW_H
+#define NFP_CRYPTO_FW_H 1
+
+#include "../ccm.h"
+
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC	0
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC	1
+
+struct nfp_crypto_reply_simple {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+};
+
+struct nfp_crypto_req_reset {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+};
+
+#define NFP_NET_TLS_IPVER		GENMASK(15, 12)
+#define NFP_NET_TLS_VLAN		GENMASK(11, 0)
+#define NFP_NET_TLS_VLAN_UNUSED			4095
+
+struct nfp_crypto_req_add_front {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	u8 key_len;
+	__be16 ipver_vlan __packed;
+	u8 l4_proto;
+#define NFP_NET_TLS_NON_ADDR_KEY_LEN	8
+	u8 l3_addrs[0];
+};
+
+struct nfp_crypto_req_add_back {
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 key[8];
+	__be32 salt;
+	__be32 iv[2];
+	__be32 counter;
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+
+struct nfp_crypto_req_add_v4 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip;
+	__be32 dst_ip;
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_req_add_v6 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip[4];
+	__be32 dst_ip[4];
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_reply_add {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_del {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_update {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	__be32 handle[2];
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
new file mode 100644
index 000000000000..96a96b35c0ca
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <net/tls.h>
+
+#include "../ccm.h"
+#include "../nfp_net.h"
+#include "crypto.h"
+#include "fw.h"
+
+#define NFP_NET_TLS_CCM_MBOX_OPS_MASK		\
+	(BIT(NFP_CCM_TYPE_CRYPTO_RESET) |	\
+	 BIT(NFP_CCM_TYPE_CRYPTO_ADD) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_DEL) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_UPDATE))
+
+#define NFP_NET_TLS_OPCODE_MASK_RX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC)
+
+#define NFP_NET_TLS_OPCODE_MASK_TX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC)
+
+#define NFP_NET_TLS_OPCODE_MASK						\
+	(NFP_NET_TLS_OPCODE_MASK_RX | NFP_NET_TLS_OPCODE_MASK_TX)
+
+static void nfp_net_crypto_set_op(struct nfp_net *nn, u8 opcode, bool on)
+{
+	u32 off, val;
+
+	off = nn->tlv_caps.crypto_enable_off + round_down(opcode / 8, 4);
+
+	val = nn_readl(nn, off);
+	if (on)
+		val |= BIT(opcode & 31);
+	else
+		val &= ~BIT(opcode & 31);
+	nn_writel(nn, off, val);
+}
+
+static bool
+__nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			       enum tls_offload_ctx_dir direction)
+{
+	u8 opcode;
+	int cnt;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+		nn->ktls_tx_conn_cnt += add;
+		cnt = nn->ktls_tx_conn_cnt;
+		nn->dp.ktls_tx = !!nn->ktls_tx_conn_cnt;
+	} else {
+		opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+		nn->ktls_rx_conn_cnt += add;
+		cnt = nn->ktls_rx_conn_cnt;
+	}
+
+	/* Care only about 0 -> 1 and 1 -> 0 transitions */
+	if (cnt > 1)
+		return false;
+
+	nfp_net_crypto_set_op(nn, opcode, cnt);
+	return true;
+}
+
+static int
+nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			     enum tls_offload_ctx_dir direction)
+{
+	int ret = 0;
+
+	/* Use the BAR lock to protect the connection counts */
+	nn_ctrl_bar_lock(nn);
+	if (__nfp_net_tls_conn_cnt_changed(nn, add, direction)) {
+		ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+		/* Undo the cnt adjustment if failed */
+		if (ret)
+			__nfp_net_tls_conn_cnt_changed(nn, -add, direction);
+	}
+	nn_ctrl_bar_unlock(nn);
+
+	return ret;
+}
+
+static int
+nfp_net_tls_conn_add(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, 1, direction);
+}
+
+static int
+nfp_net_tls_conn_remove(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, -1, direction);
+}
+
+static struct sk_buff *
+nfp_net_tls_alloc_simple(struct nfp_net *nn, size_t req_sz, gfp_t flags)
+{
+	return nfp_ccm_mbox_msg_alloc(nn, req_sz,
+				      sizeof(struct nfp_crypto_reply_simple),
+				      flags);
+}
+
+static int
+nfp_net_tls_communicate_simple(struct nfp_net *nn, struct sk_buff *skb,
+			       const char *name, enum nfp_ccm_type type)
+{
+	struct nfp_crypto_reply_simple *reply;
+	int err;
+
+	err = __nfp_ccm_mbox_communicate(nn, skb, type,
+					 sizeof(*reply), sizeof(*reply),
+					 type == NFP_CCM_TYPE_CRYPTO_DEL);
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to %s TLS: %d\n", name, err);
+		return err;
+	}
+
+	reply = (void *)skb->data;
+	err = -be32_to_cpu(reply->error);
+	if (err)
+		nn_dp_warn(&nn->dp, "failed to %s TLS, fw replied: %d\n",
+			   name, err);
+	dev_consume_skb_any(skb);
+
+	return err;
+}
+
+static void nfp_net_tls_del_fw(struct nfp_net *nn, __be32 *fw_handle)
+{
+	struct nfp_crypto_req_del *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+	memcpy(req->handle, fw_handle, sizeof(req->handle));
+
+	nfp_net_tls_communicate_simple(nn, skb, "delete",
+				       NFP_CCM_TYPE_CRYPTO_DEL);
+}
+
+static void
+nfp_net_tls_set_ipver_vlan(struct nfp_crypto_req_add_front *front, u8 ipver)
+{
+	front->ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, ipver) |
+					FIELD_PREP(NFP_NET_TLS_VLAN,
+						   NFP_NET_TLS_VLAN_UNUSED));
+}
+
+static void
+nfp_net_tls_assign_conn_id(struct nfp_net *nn,
+			   struct nfp_crypto_req_add_front *front)
+{
+	u32 len;
+	u64 id;
+
+	id = atomic64_inc_return(&nn->ktls_conn_id_gen);
+	len = front->key_len - NFP_NET_TLS_NON_ADDR_KEY_LEN;
+
+	memcpy(front->l3_addrs, &id, sizeof(id));
+	memset(front->l3_addrs + sizeof(id), 0, len - sizeof(id));
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv4(struct nfp_net *nn, struct nfp_crypto_req_add_v4 *req,
+		     struct sock *sk, int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	req->front.key_len += sizeof(__be32) * 2;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		nfp_net_tls_assign_conn_id(nn, &req->front);
+	} else {
+		req->src_ip = inet->inet_daddr;
+		req->dst_ip = inet->inet_saddr;
+	}
+
+	return &req->back;
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv6(struct nfp_net *nn, struct nfp_crypto_req_add_v6 *req,
+		     struct sock *sk, int direction)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	req->front.key_len += sizeof(struct in6_addr) * 2;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		nfp_net_tls_assign_conn_id(nn, &req->front);
+	} else {
+		memcpy(req->src_ip, &sk->sk_v6_daddr, sizeof(req->src_ip));
+		memcpy(req->dst_ip, &np->saddr, sizeof(req->dst_ip));
+	}
+
+#endif
+	return &req->back;
+}
+
+static void
+nfp_net_tls_set_l4(struct nfp_crypto_req_add_front *front,
+		   struct nfp_crypto_req_add_back *back, struct sock *sk,
+		   int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	front->l4_proto = IPPROTO_TCP;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		back->src_port = 0;
+		back->dst_port = 0;
+	} else {
+		back->src_port = inet->inet_dport;
+		back->dst_port = inet->inet_sport;
+	}
+}
+
+static u8 nfp_tls_1_2_dir_to_opcode(enum tls_offload_ctx_dir direction)
+{
+	switch (direction) {
+	case TLS_OFFLOAD_CTX_DIR_TX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+	case TLS_OFFLOAD_CTX_DIR_RX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static bool
+nfp_net_cipher_supported(struct nfp_net *nn, u16 cipher_type,
+			 enum tls_offload_ctx_dir direction)
+{
+	u8 bit;
+
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+			bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+		else
+			bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+		break;
+	default:
+		return false;
+	}
+
+	return nn->tlv_caps.crypto_ops & BIT(bit);
+}
+
+static int
+nfp_net_tls_add(struct net_device *netdev, struct sock *sk,
+		enum tls_offload_ctx_dir direction,
+		struct tls_crypto_info *crypto_info,
+		u32 start_offload_tcp_sn)
+{
+	struct tls12_crypto_info_aes_gcm_128 *tls_ci;
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_crypto_req_add_front *front;
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct nfp_crypto_req_add_back *back;
+	struct nfp_crypto_reply_add *reply;
+	struct sk_buff *skb;
+	size_t req_sz;
+	void *req;
+	bool ipv6;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct nfp_net_tls_offload_ctx) >
+		     TLS_DRIVER_STATE_SIZE_TX);
+	BUILD_BUG_ON(offsetof(struct nfp_net_tls_offload_ctx, rx_end) >
+		     TLS_DRIVER_STATE_SIZE_RX);
+
+	if (!nfp_net_cipher_supported(nn, crypto_info->cipher_type, direction))
+		return -EOPNOTSUPP;
+
+	switch (sk->sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		if (sk->sk_ipv6only ||
+		    ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
+			req_sz = sizeof(struct nfp_crypto_req_add_v6);
+			ipv6 = true;
+			break;
+		}
+#endif
+		/* fall through */
+	case AF_INET:
+		req_sz = sizeof(struct nfp_crypto_req_add_v4);
+		ipv6 = false;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nfp_net_tls_conn_add(nn, direction);
+	if (err)
+		return err;
+
+	skb = nfp_ccm_mbox_msg_alloc(nn, req_sz, sizeof(*reply), GFP_KERNEL);
+	if (!skb) {
+		err = -ENOMEM;
+		goto err_conn_remove;
+	}
+
+	front = (void *)skb->data;
+	front->ep_id = 0;
+	front->key_len = NFP_NET_TLS_NON_ADDR_KEY_LEN;
+	front->opcode = nfp_tls_1_2_dir_to_opcode(direction);
+	memset(front->resv, 0, sizeof(front->resv));
+
+	nfp_net_tls_set_ipver_vlan(front, ipv6 ? 6 : 4);
+
+	req = (void *)skb->data;
+	if (ipv6)
+		back = nfp_net_tls_set_ipv6(nn, req, sk, direction);
+	else
+		back = nfp_net_tls_set_ipv4(nn, req, sk, direction);
+
+	nfp_net_tls_set_l4(front, back, sk, direction);
+
+	back->counter = 0;
+	back->tcp_seq = cpu_to_be32(start_offload_tcp_sn);
+
+	tls_ci = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+	memcpy(back->key, tls_ci->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memset(&back->key[TLS_CIPHER_AES_GCM_128_KEY_SIZE / 4], 0,
+	       sizeof(back->key) - TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memcpy(back->iv, tls_ci->iv, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	memcpy(&back->salt, tls_ci->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(back->rec_no, tls_ci->rec_seq, sizeof(tls_ci->rec_seq));
+
+	/* Get an extra ref on the skb so we can wipe the key after */
+	skb_get(skb);
+
+	err = nfp_ccm_mbox_communicate(nn, skb, NFP_CCM_TYPE_CRYPTO_ADD,
+				       sizeof(*reply), sizeof(*reply));
+	reply = (void *)skb->data;
+
+	/* We depend on CCM MBOX code not reallocating skb we sent
+	 * so we can clear the key material out of the memory.
+	 */
+	if (!WARN_ON_ONCE((u8 *)back < skb->head ||
+			  (u8 *)back > skb_end_pointer(skb)) &&
+	    !WARN_ON_ONCE((u8 *)&reply[1] > (u8 *)back))
+		memzero_explicit(back, sizeof(*back));
+	dev_consume_skb_any(skb); /* the extra ref from skb_get() above */
+
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to add TLS: %d (%d)\n",
+			   err, direction == TLS_OFFLOAD_CTX_DIR_TX);
+		/* communicate frees skb on error */
+		goto err_conn_remove;
+	}
+
+	err = -be32_to_cpu(reply->error);
+	if (err) {
+		if (err == -ENOSPC) {
+			if (!atomic_fetch_inc(&nn->ktls_no_space))
+				nn_info(nn, "HW TLS table full\n");
+		} else {
+			nn_dp_warn(&nn->dp,
+				   "failed to add TLS, FW replied: %d\n", err);
+		}
+		goto err_free_skb;
+	}
+
+	if (!reply->handle[0] && !reply->handle[1]) {
+		nn_dp_warn(&nn->dp, "FW returned NULL handle\n");
+		err = -EINVAL;
+		goto err_fw_remove;
+	}
+
+	ntls = tls_driver_ctx(sk, direction);
+	memcpy(ntls->fw_handle, reply->handle, sizeof(ntls->fw_handle));
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+		ntls->next_seq = start_offload_tcp_sn;
+	dev_consume_skb_any(skb);
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+		return 0;
+
+	tls_offload_rx_resync_set_type(sk,
+				       TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+	return 0;
+
+err_fw_remove:
+	nfp_net_tls_del_fw(nn, reply->handle);
+err_free_skb:
+	dev_consume_skb_any(skb);
+err_conn_remove:
+	nfp_net_tls_conn_remove(nn, direction);
+	return err;
+}
+
+static void
+nfp_net_tls_del(struct net_device *netdev, struct tls_context *tls_ctx,
+		enum tls_offload_ctx_dir direction)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+
+	nfp_net_tls_conn_remove(nn, direction);
+
+	ntls = __tls_driver_ctx(tls_ctx, direction);
+	nfp_net_tls_del_fw(nn, ntls->fw_handle);
+}
+
+static int
+nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
+		   u8 *rcd_sn, enum tls_offload_ctx_dir direction)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct nfp_crypto_req_update *req;
+	struct sk_buff *skb;
+	gfp_t flags;
+	int err;
+
+	flags = direction == TLS_OFFLOAD_CTX_DIR_TX ? GFP_KERNEL : GFP_ATOMIC;
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), flags);
+	if (!skb)
+		return -ENOMEM;
+
+	ntls = tls_driver_ctx(sk, direction);
+	req = (void *)skb->data;
+	req->ep_id = 0;
+	req->opcode = nfp_tls_1_2_dir_to_opcode(direction);
+	memset(req->resv, 0, sizeof(req->resv));
+	memcpy(req->handle, ntls->fw_handle, sizeof(ntls->fw_handle));
+	req->tcp_seq = cpu_to_be32(seq);
+	memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no));
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		err = nfp_net_tls_communicate_simple(nn, skb, "sync",
+						     NFP_CCM_TYPE_CRYPTO_UPDATE);
+		if (err)
+			return err;
+		ntls->next_seq = seq;
+	} else {
+		nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE,
+				  sizeof(struct nfp_crypto_reply_simple));
+	}
+
+	return 0;
+}
+
+static const struct tlsdev_ops nfp_net_tls_ops = {
+	.tls_dev_add = nfp_net_tls_add,
+	.tls_dev_del = nfp_net_tls_del,
+	.tls_dev_resync = nfp_net_tls_resync,
+};
+
+static int nfp_net_tls_reset(struct nfp_net *nn)
+{
+	struct nfp_crypto_req_reset *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+
+	return nfp_net_tls_communicate_simple(nn, skb, "reset",
+					      NFP_CCM_TYPE_CRYPTO_RESET);
+}
+
+int nfp_net_tls_init(struct nfp_net *nn)
+{
+	struct net_device *netdev = nn->dp.netdev;
+	int err;
+
+	if (!(nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK))
+		return 0;
+
+	if ((nn->tlv_caps.mbox_cmsg_types & NFP_NET_TLS_CCM_MBOX_OPS_MASK) !=
+	    NFP_NET_TLS_CCM_MBOX_OPS_MASK)
+		return 0;
+
+	if (!nfp_ccm_mbox_fits(nn, sizeof(struct nfp_crypto_req_add_v6))) {
+		nn_warn(nn, "disabling TLS offload - mbox too small: %d\n",
+			nn->tlv_caps.mbox_len);
+		return 0;
+	}
+
+	err = nfp_net_tls_reset(nn);
+	if (err)
+		return err;
+
+	nn_ctrl_bar_lock(nn);
+	nn_writel(nn, nn->tlv_caps.crypto_enable_off, 0);
+	err = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+	nn_ctrl_bar_unlock(nn);
+	if (err)
+		return err;
+
+	if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_RX) {
+		netdev->hw_features |= NETIF_F_HW_TLS_RX;
+		netdev->features |= NETIF_F_HW_TLS_RX;
+	}
+	if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_TX) {
+		netdev->hw_features |= NETIF_F_HW_TLS_TX;
+		netdev->features |= NETIF_F_HW_TLS_TX;
+	}
+
+	netdev->tlsdev_ops = &nfp_net_tls_ops;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index c56e31d9f8a4..5a54fe848de4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -54,7 +54,8 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
 
 static int
 nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act,
-	       struct nfp_fl_payload *nfp_flow, int act_len)
+	       struct nfp_fl_payload *nfp_flow, int act_len,
+	       struct netlink_ext_ack *extack)
 {
 	size_t act_size = sizeof(struct nfp_fl_pre_lag);
 	struct nfp_fl_pre_lag *pre_lag;
@@ -65,8 +66,10 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act,
 	if (!out_dev || !netif_is_lag_master(out_dev))
 		return 0;
 
-	if (act_len + act_size > NFP_FL_MAX_A_SIZ)
+	if (act_len + act_size > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at LAG action");
 		return -EOPNOTSUPP;
+	}
 
 	/* Pre_lag action must be first on action list.
 	 * If other actions already exist they need pushed forward.
@@ -76,7 +79,7 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act,
 			nfp_flow->action_data, act_len);
 
 	pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data;
-	err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag);
+	err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag, extack);
 	if (err)
 		return err;
 
@@ -93,7 +96,8 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 	      const struct flow_action_entry *act,
 	      struct nfp_fl_payload *nfp_flow,
 	      bool last, struct net_device *in_dev,
-	      enum nfp_flower_tun_type tun_type, int *tun_out_cnt)
+	      enum nfp_flower_tun_type tun_type, int *tun_out_cnt,
+	      struct netlink_ext_ack *extack)
 {
 	size_t act_size = sizeof(struct nfp_fl_output);
 	struct nfp_flower_priv *priv = app->priv;
@@ -104,18 +108,24 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 	output->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
 	out_dev = act->dev;
-	if (!out_dev)
+	if (!out_dev) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid egress interface for mirred action");
 		return -EOPNOTSUPP;
+	}
 
 	tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
 
 	if (tun_type) {
 		/* Verify the egress netdev matches the tunnel type. */
-		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
+		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface does not match the required tunnel type");
 			return -EOPNOTSUPP;
+		}
 
-		if (*tun_out_cnt)
+		if (*tun_out_cnt) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot offload more than one tunnel mirred output per filter");
 			return -EOPNOTSUPP;
+		}
 		(*tun_out_cnt)++;
 
 		output->flags = cpu_to_be16(tmp_flags |
@@ -127,8 +137,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 
 		output->flags = cpu_to_be16(tmp_flags);
 		gid = nfp_flower_lag_get_output_id(app, out_dev);
-		if (gid < 0)
+		if (gid < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot find group id for LAG action");
 			return gid;
+		}
 		output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid);
 	} else {
 		/* Set action output parameters. */
@@ -136,29 +148,58 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 
 		if (nfp_netdev_is_nfp_repr(in_dev)) {
 			/* Confirm ingress and egress are on same device. */
-			if (!netdev_port_same_parent_id(in_dev, out_dev))
+			if (!netdev_port_same_parent_id(in_dev, out_dev)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress and egress interfaces are on different devices");
 				return -EOPNOTSUPP;
+			}
 		}
 
-		if (!nfp_netdev_is_nfp_repr(out_dev))
+		if (!nfp_netdev_is_nfp_repr(out_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface is not an nfp port");
 			return -EOPNOTSUPP;
+		}
 
 		output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
-		if (!output->port)
+		if (!output->port) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid port id for egress interface");
 			return -EOPNOTSUPP;
+		}
 	}
 	nfp_flow->meta.shortcut = output->port;
 
 	return 0;
 }
 
+static bool
+nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
+{
+	struct flow_action_entry *act = flow->rule->action.entries;
+	int num_act = flow->rule->action.num_entries;
+	int act_idx;
+
+	/* Preparse action list for next mirred or redirect action */
+	for (act_idx = start_idx + 1; act_idx < num_act; act_idx++)
+		if (act[act_idx].id == FLOW_ACTION_REDIRECT ||
+		    act[act_idx].id == FLOW_ACTION_MIRRED)
+			return netif_is_gretap(act[act_idx].dev);
+
+	return false;
+}
+
 static enum nfp_flower_tun_type
-nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app,
-				const struct flow_action_entry *act)
+nfp_fl_get_tun_from_act(struct nfp_app *app,
+			struct flow_cls_offload *flow,
+			const struct flow_action_entry *act, int act_idx)
 {
 	const struct ip_tunnel_info *tun = act->tunnel;
 	struct nfp_flower_priv *priv = app->priv;
 
+	/* Determine the tunnel type based on the egress netdev
+	 * in the mirred action for tunnels without l4.
+	 */
+	if (nfp_flower_tun_is_gre(flow, act_idx))
+		return NFP_FL_TUNNEL_GRE;
+
 	switch (tun->key.tp_dst) {
 	case htons(IANA_VXLAN_UDP_PORT):
 		return NFP_FL_TUNNEL_VXLAN;
@@ -194,7 +235,8 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
 
 static int
 nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
-			   const struct flow_action_entry *act)
+			   const struct flow_action_entry *act,
+			   struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel;
 	int opt_len, opt_cnt, act_start, tot_push_len;
@@ -212,20 +254,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
 		struct geneve_opt *opt = (struct geneve_opt *)src;
 
 		opt_cnt++;
-		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT)
+		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed number of geneve options exceeded");
 			return -EOPNOTSUPP;
+		}
 
 		tot_push_len += sizeof(struct nfp_fl_push_geneve) +
 			       opt->length * 4;
-		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT)
+		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options");
 			return -EOPNOTSUPP;
+		}
 
 		opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
 		src += sizeof(struct geneve_opt) + opt->length * 4;
 	}
 
-	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ)
+	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options");
 		return -EOPNOTSUPP;
+	}
 
 	act_start = *list_len;
 	*list_len += tot_push_len;
@@ -256,14 +304,13 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
 }
 
 static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
-			struct nfp_fl_set_ipv4_udp_tun *set_tun,
-			const struct flow_action_entry *act,
-			struct nfp_fl_pre_tunnel *pre_tun,
-			enum nfp_flower_tun_type tun_type,
-			struct net_device *netdev)
+nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
+		    const struct flow_action_entry *act,
+		    struct nfp_fl_pre_tunnel *pre_tun,
+		    enum nfp_flower_tun_type tun_type,
+		    struct net_device *netdev, struct netlink_ext_ack *extack)
 {
-	size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
+	size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun);
 	const struct ip_tunnel_info *ip_tun = act->tunnel;
 	struct nfp_flower_priv *priv = app->priv;
 	u32 tmp_set_ip_tun_type_index = 0;
@@ -275,8 +322,10 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
 		     NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
 	if (ip_tun->options_len &&
 	    (tun_type != NFP_FL_TUNNEL_GENEVE ||
-	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))
+	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve options offload");
 		return -EOPNOTSUPP;
+	}
 
 	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
 	set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
@@ -316,8 +365,10 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
 	set_tun->tos = ip_tun->key.tos;
 
 	if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
-	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
+	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload");
 		return -EOPNOTSUPP;
+	}
 	set_tun->tun_flags = ip_tun->key.tun_flags;
 
 	if (tun_type == NFP_FL_TUNNEL_GENEVE) {
@@ -345,18 +396,22 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask)
 
 static int
 nfp_fl_set_eth(const struct flow_action_entry *act, u32 off,
-	       struct nfp_fl_set_eth *set_eth)
+	       struct nfp_fl_set_eth *set_eth, struct netlink_ext_ack *extack)
 {
 	u32 exact, mask;
 
-	if (off + 4 > ETH_ALEN * 2)
+	if (off + 4 > ETH_ALEN * 2) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action");
 		return -EOPNOTSUPP;
+	}
 
 	mask = ~act->mangle.mask;
 	exact = act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action");
 		return -EOPNOTSUPP;
+	}
 
 	nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off],
 			    &set_eth->eth_addr_mask[off]);
@@ -377,7 +432,8 @@ struct ipv4_ttl_word {
 static int
 nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
 	       struct nfp_fl_set_ip4_addrs *set_ip_addr,
-	       struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos)
+	       struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos,
+	       struct netlink_ext_ack *extack)
 {
 	struct ipv4_ttl_word *ttl_word_mask;
 	struct ipv4_ttl_word *ttl_word;
@@ -389,8 +445,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
 	mask = (__force __be32)~act->mangle.mask;
 	exact = (__force __be32)act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 action");
 		return -EOPNOTSUPP;
+	}
 
 	switch (off) {
 	case offsetof(struct iphdr, daddr):
@@ -413,8 +471,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
 		ttl_word_mask = (struct ipv4_ttl_word *)&mask;
 		ttl_word = (struct ipv4_ttl_word *)&exact;
 
-		if (ttl_word_mask->protocol || ttl_word_mask->check)
+		if (ttl_word_mask->protocol || ttl_word_mask->check) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 ttl action");
 			return -EOPNOTSUPP;
+		}
 
 		set_ip_ttl_tos->ipv4_ttl_mask |= ttl_word_mask->ttl;
 		set_ip_ttl_tos->ipv4_ttl &= ~ttl_word_mask->ttl;
@@ -429,8 +489,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
 		tos_word = (struct iphdr *)&exact;
 
 		if (tos_word_mask->version || tos_word_mask->ihl ||
-		    tos_word_mask->tot_len)
+		    tos_word_mask->tot_len) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 tos action");
 			return -EOPNOTSUPP;
+		}
 
 		set_ip_ttl_tos->ipv4_tos_mask |= tos_word_mask->tos;
 		set_ip_ttl_tos->ipv4_tos &= ~tos_word_mask->tos;
@@ -441,6 +503,7 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
 					      NFP_FL_LW_SIZ;
 		break;
 	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv4 header");
 		return -EOPNOTSUPP;
 	}
 
@@ -468,7 +531,8 @@ struct ipv6_hop_limit_word {
 
 static int
 nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask,
-				    struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl)
+				    struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl,
+				    struct netlink_ext_ack *extack)
 {
 	struct ipv6_hop_limit_word *fl_hl_mask;
 	struct ipv6_hop_limit_word *fl_hl;
@@ -478,8 +542,10 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask,
 		fl_hl_mask = (struct ipv6_hop_limit_word *)&mask;
 		fl_hl = (struct ipv6_hop_limit_word *)&exact;
 
-		if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len)
+		if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 hop limit action");
 			return -EOPNOTSUPP;
+		}
 
 		ip_hl_fl->ipv6_hop_limit_mask |= fl_hl_mask->hop_limit;
 		ip_hl_fl->ipv6_hop_limit &= ~fl_hl_mask->hop_limit;
@@ -488,8 +554,10 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask,
 		break;
 	case round_down(offsetof(struct ipv6hdr, flow_lbl), 4):
 		if (mask & ~IPV6_FLOW_LABEL_MASK ||
-		    exact & ~IPV6_FLOW_LABEL_MASK)
+		    exact & ~IPV6_FLOW_LABEL_MASK) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow label action");
 			return -EOPNOTSUPP;
+		}
 
 		ip_hl_fl->ipv6_label_mask |= mask;
 		ip_hl_fl->ipv6_label &= ~mask;
@@ -507,7 +575,8 @@ static int
 nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
 	       struct nfp_fl_set_ipv6_addr *ip_dst,
 	       struct nfp_fl_set_ipv6_addr *ip_src,
-	       struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl)
+	       struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl,
+	       struct netlink_ext_ack *extack)
 {
 	__be32 exact, mask;
 	int err = 0;
@@ -517,12 +586,14 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
 	mask = (__force __be32)~act->mangle.mask;
 	exact = (__force __be32)act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 action");
 		return -EOPNOTSUPP;
+	}
 
 	if (off < offsetof(struct ipv6hdr, saddr)) {
 		err = nfp_fl_set_ip6_hop_limit_flow_label(off, exact, mask,
-							  ip_hl_fl);
+							  ip_hl_fl, extack);
 	} else if (off < offsetof(struct ipv6hdr, daddr)) {
 		word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact);
 		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word,
@@ -533,6 +604,7 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
 		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word,
 				      exact, mask, ip_dst);
 	} else {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv6 header");
 		return -EOPNOTSUPP;
 	}
 
@@ -541,18 +613,23 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
 
 static int
 nfp_fl_set_tport(const struct flow_action_entry *act, u32 off,
-		 struct nfp_fl_set_tport *set_tport, int opcode)
+		 struct nfp_fl_set_tport *set_tport, int opcode,
+		 struct netlink_ext_ack *extack)
 {
 	u32 exact, mask;
 
-	if (off)
+	if (off) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of L4 header");
 		return -EOPNOTSUPP;
+	}
 
 	mask = ~act->mangle.mask;
 	exact = act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit L4 action");
 		return -EOPNOTSUPP;
+	}
 
 	nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val,
 			    set_tport->tp_port_mask);
@@ -592,11 +669,11 @@ struct nfp_flower_pedit_acts {
 };
 
 static int
-nfp_fl_commit_mangle(struct tc_cls_flower_offload *flow, char *nfp_action,
+nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
 		     int *a_len, struct nfp_flower_pedit_acts *set_act,
 		     u32 *csum_updated)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	size_t act_size = 0;
 	u8 ip_proto = 0;
 
@@ -694,8 +771,9 @@ nfp_fl_commit_mangle(struct tc_cls_flower_offload *flow, char *nfp_action,
 
 static int
 nfp_fl_pedit(const struct flow_action_entry *act,
-	     struct tc_cls_flower_offload *flow, char *nfp_action, int *a_len,
-	     u32 *csum_updated, struct nfp_flower_pedit_acts *set_act)
+	     struct flow_cls_offload *flow, char *nfp_action, int *a_len,
+	     u32 *csum_updated, struct nfp_flower_pedit_acts *set_act,
+	     struct netlink_ext_ack *extack)
 {
 	enum flow_action_mangle_base htype;
 	u32 offset;
@@ -705,21 +783,22 @@ nfp_fl_pedit(const struct flow_action_entry *act,
 
 	switch (htype) {
 	case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
-		return nfp_fl_set_eth(act, offset, &set_act->set_eth);
+		return nfp_fl_set_eth(act, offset, &set_act->set_eth, extack);
 	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
 		return nfp_fl_set_ip4(act, offset, &set_act->set_ip_addr,
-				      &set_act->set_ip_ttl_tos);
+				      &set_act->set_ip_ttl_tos, extack);
 	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
 		return nfp_fl_set_ip6(act, offset, &set_act->set_ip6_dst,
 				      &set_act->set_ip6_src,
-				      &set_act->set_ip6_tc_hl_fl);
+				      &set_act->set_ip6_tc_hl_fl, extack);
 	case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
 		return nfp_fl_set_tport(act, offset, &set_act->set_tport,
-					NFP_FL_ACTION_OPCODE_SET_TCP);
+					NFP_FL_ACTION_OPCODE_SET_TCP, extack);
 	case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
 		return nfp_fl_set_tport(act, offset, &set_act->set_tport,
-					NFP_FL_ACTION_OPCODE_SET_UDP);
+					NFP_FL_ACTION_OPCODE_SET_UDP, extack);
 	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported header");
 		return -EOPNOTSUPP;
 	}
 }
@@ -730,7 +809,8 @@ nfp_flower_output_action(struct nfp_app *app,
 			 struct nfp_fl_payload *nfp_fl, int *a_len,
 			 struct net_device *netdev, bool last,
 			 enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
-			 int *out_cnt, u32 *csum_updated)
+			 int *out_cnt, u32 *csum_updated,
+			 struct netlink_ext_ack *extack)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_output *output;
@@ -739,15 +819,19 @@ nfp_flower_output_action(struct nfp_app *app,
 	/* If csum_updated has not been reset by now, it means HW will
 	 * incorrectly update csums when they are not requested.
 	 */
-	if (*csum_updated)
+	if (*csum_updated) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: set actions without updating checksums are not supported");
 		return -EOPNOTSUPP;
+	}
 
-	if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+	if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: mirred output increases action list size beyond the allowed maximum");
 		return -EOPNOTSUPP;
+	}
 
 	output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
 	err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type,
-			    tun_out_cnt);
+			    tun_out_cnt, extack);
 	if (err)
 		return err;
 
@@ -757,11 +841,13 @@ nfp_flower_output_action(struct nfp_app *app,
 		/* nfp_fl_pre_lag returns -err or size of prelag action added.
 		 * This will be 0 if it is not egressing to a lag dev.
 		 */
-		prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len);
-		if (prelag_size < 0)
+		prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len, extack);
+		if (prelag_size < 0) {
 			return prelag_size;
-		else if (prelag_size > 0 && (!last || *out_cnt))
+		} else if (prelag_size > 0 && (!last || *out_cnt)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: LAG action has to be last action in action list");
 			return -EOPNOTSUPP;
+		}
 
 		*a_len += prelag_size;
 	}
@@ -772,14 +858,15 @@ nfp_flower_output_action(struct nfp_app *app,
 
 static int
 nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
-		       struct tc_cls_flower_offload *flow,
+		       struct flow_cls_offload *flow,
 		       struct nfp_fl_payload *nfp_fl, int *a_len,
 		       struct net_device *netdev,
 		       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
 		       int *out_cnt, u32 *csum_updated,
-		       struct nfp_flower_pedit_acts *set_act)
+		       struct nfp_flower_pedit_acts *set_act,
+		       struct netlink_ext_ack *extack, int act_idx)
 {
-	struct nfp_fl_set_ipv4_udp_tun *set_tun;
+	struct nfp_fl_set_ipv4_tun *set_tun;
 	struct nfp_fl_pre_tunnel *pre_tun;
 	struct nfp_fl_push_vlan *psh_v;
 	struct nfp_fl_pop_vlan *pop_v;
@@ -792,20 +879,23 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 	case FLOW_ACTION_REDIRECT:
 		err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev,
 					       true, tun_type, tun_out_cnt,
-					       out_cnt, csum_updated);
+					       out_cnt, csum_updated, extack);
 		if (err)
 			return err;
 		break;
 	case FLOW_ACTION_MIRRED:
 		err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev,
 					       false, tun_type, tun_out_cnt,
-					       out_cnt, csum_updated);
+					       out_cnt, csum_updated, extack);
 		if (err)
 			return err;
 		break;
 	case FLOW_ACTION_VLAN_POP:
-		if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
+		if (*a_len +
+		    sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop vlan");
 			return -EOPNOTSUPP;
+		}
 
 		pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len];
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV);
@@ -814,8 +904,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		*a_len += sizeof(struct nfp_fl_pop_vlan);
 		break;
 	case FLOW_ACTION_VLAN_PUSH:
-		if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ)
+		if (*a_len +
+		    sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push vlan");
 			return -EOPNOTSUPP;
+		}
 
 		psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len];
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
@@ -826,35 +919,41 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 	case FLOW_ACTION_TUNNEL_ENCAP: {
 		const struct ip_tunnel_info *ip_tun = act->tunnel;
 
-		*tun_type = nfp_fl_get_tun_from_act_l4_port(app, act);
-		if (*tun_type == NFP_FL_TUNNEL_NONE)
+		*tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx);
+		if (*tun_type == NFP_FL_TUNNEL_NONE) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list");
 			return -EOPNOTSUPP;
+		}
 
-		if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS)
+		if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel flags in action list");
 			return -EOPNOTSUPP;
+		}
 
 		/* Pre-tunnel action is required for tunnel encap.
 		 * This checks for next hop entries on NFP.
 		 * If none, the packet falls back before applying other actions.
 		 */
 		if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
-		    sizeof(struct nfp_fl_set_ipv4_udp_tun) > NFP_FL_MAX_A_SIZ)
+		    sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap");
 			return -EOPNOTSUPP;
+		}
 
 		pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
 		*a_len += sizeof(struct nfp_fl_pre_tunnel);
 
-		err = nfp_fl_push_geneve_options(nfp_fl, a_len, act);
+		err = nfp_fl_push_geneve_options(nfp_fl, a_len, act, extack);
 		if (err)
 			return err;
 
 		set_tun = (void *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_set_ipv4_udp_tun(app, set_tun, act, pre_tun,
-					      *tun_type, netdev);
+		err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun,
+					  *tun_type, netdev, extack);
 		if (err)
 			return err;
-		*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
+		*a_len += sizeof(struct nfp_fl_set_ipv4_tun);
 		}
 		break;
 	case FLOW_ACTION_TUNNEL_DECAP:
@@ -862,13 +961,15 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		return 0;
 	case FLOW_ACTION_MANGLE:
 		if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len],
-				 a_len, csum_updated, set_act))
+				 a_len, csum_updated, set_act, extack))
 			return -EOPNOTSUPP;
 		break;
 	case FLOW_ACTION_CSUM:
 		/* csum action requests recalc of something we have not fixed */
-		if (act->csum_flags & ~*csum_updated)
+		if (act->csum_flags & ~*csum_updated) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported csum update action in action list");
 			return -EOPNOTSUPP;
+		}
 		/* If we will correctly fix the csum we can remove it from the
 		 * csum update list. Which will later be used to check support.
 		 */
@@ -876,6 +977,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		break;
 	default:
 		/* Currently we do not handle any other actions. */
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list");
 		return -EOPNOTSUPP;
 	}
 
@@ -919,9 +1021,10 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act,
 }
 
 int nfp_flower_compile_action(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct net_device *netdev,
-			      struct nfp_fl_payload *nfp_flow)
+			      struct nfp_fl_payload *nfp_flow,
+			      struct netlink_ext_ack *extack)
 {
 	int act_len, act_cnt, err, tun_out_cnt, out_cnt, i;
 	struct nfp_flower_pedit_acts set_act;
@@ -942,7 +1045,8 @@ int nfp_flower_compile_action(struct nfp_app *app,
 			memset(&set_act, 0, sizeof(set_act));
 		err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len,
 					     netdev, &tun_type, &tun_out_cnt,
-					     &out_cnt, &csum_updated, &set_act);
+					     &out_cnt, &csum_updated,
+					     &set_act, extack, i);
 		if (err)
 			return err;
 		act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 537f7fc19584..0f1706ae5bfc 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -8,6 +8,7 @@
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <net/geneve.h>
+#include <net/gre.h>
 #include <net/vxlan.h>
 
 #include "../nfp_app.h"
@@ -22,6 +23,7 @@
 #define NFP_FLOWER_LAYER_CT		BIT(6)
 #define NFP_FLOWER_LAYER_VXLAN		BIT(7)
 
+#define NFP_FLOWER_LAYER2_GRE		BIT(0)
 #define NFP_FLOWER_LAYER2_GENEVE	BIT(5)
 #define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
 
@@ -37,6 +39,9 @@
 #define NFP_FL_IP_FRAG_FIRST		BIT(7)
 #define NFP_FL_IP_FRAGMENTED		BIT(6)
 
+/* GRE Tunnel flags */
+#define NFP_FL_GRE_FLAG_KEY		BIT(2)
+
 /* Compressed HW representation of TCP Flags */
 #define NFP_FL_TCP_FLAG_URG		BIT(4)
 #define NFP_FL_TCP_FLAG_PSH		BIT(3)
@@ -107,6 +112,7 @@
 
 enum nfp_flower_tun_type {
 	NFP_FL_TUNNEL_NONE =	0,
+	NFP_FL_TUNNEL_GRE =	1,
 	NFP_FL_TUNNEL_VXLAN =	2,
 	NFP_FL_TUNNEL_GENEVE =	4,
 };
@@ -203,7 +209,7 @@ struct nfp_fl_pre_tunnel {
 	__be32 extra[3];
 };
 
-struct nfp_fl_set_ipv4_udp_tun {
+struct nfp_fl_set_ipv4_tun {
 	struct nfp_fl_act_head head;
 	__be16 reserved;
 	__be64 tun_id __packed;
@@ -354,6 +360,16 @@ struct nfp_flower_ipv6 {
 	struct in6_addr ipv6_dst;
 };
 
+struct nfp_flower_tun_ipv4 {
+	__be32 src;
+	__be32 dst;
+};
+
+struct nfp_flower_tun_ip_ext {
+	u8 tos;
+	u8 ttl;
+};
+
 /* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B)
  * -----------------------------------------------------------------
  *    3                   2                   1
@@ -371,15 +387,42 @@ struct nfp_flower_ipv6 {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv4_udp_tun {
-	__be32 ip_src;
-	__be32 ip_dst;
+	struct nfp_flower_tun_ipv4 ipv4;
 	__be16 reserved1;
-	u8 tos;
-	u8 ttl;
+	struct nfp_flower_tun_ip_ext ip_ext;
 	__be32 reserved2;
 	__be32 tun_id;
 };
 
+/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_src                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_dst                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           tun_flags           |       tos     |       ttl     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            Reserved           |           Ethertype           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                              Key                              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Reserved                            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct nfp_flower_ipv4_gre_tun {
+	struct nfp_flower_tun_ipv4 ipv4;
+	__be16 tun_flags;
+	struct nfp_flower_tun_ip_ext ip_ext;
+	__be16 reserved1;
+	__be16 ethertype;
+	__be32 tun_key;
+	__be32 reserved2;
+};
+
 struct nfp_flower_geneve_options {
 	u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
 };
@@ -530,6 +573,8 @@ nfp_fl_netdev_is_tunnel_type(struct net_device *netdev,
 {
 	if (netif_is_vxlan(netdev))
 		return tun_type == NFP_FL_TUNNEL_VXLAN;
+	if (netif_is_gretap(netdev))
+		return tun_type == NFP_FL_TUNNEL_GRE;
 	if (netif_is_geneve(netdev))
 		return tun_type == NFP_FL_TUNNEL_GENEVE;
 
@@ -546,6 +591,8 @@ static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev)
 		return true;
 	if (netif_is_geneve(netdev))
 		return true;
+	if (netif_is_gretap(netdev))
+		return true;
 
 	return false;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
index 5db838f45694..63907aeb3884 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
@@ -156,7 +156,8 @@ nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
 
 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
 				       struct net_device *master,
-				       struct nfp_fl_pre_lag *pre_act)
+				       struct nfp_fl_pre_lag *pre_act,
+				       struct netlink_ext_ack *extack)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_lag_group *group = NULL;
@@ -167,6 +168,7 @@ int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
 							  master);
 	if (!group) {
 		mutex_unlock(&priv->nfp_lag.lock);
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: group does not exist for LAG action");
 		return -ENOENT;
 	}
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 40957a8dbfe6..af9441d5787f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -343,19 +343,22 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
 				     struct nfp_fl_payload *sub_flow1,
 				     struct nfp_fl_payload *sub_flow2);
 int nfp_flower_compile_flow_match(struct nfp_app *app,
-				  struct tc_cls_flower_offload *flow,
+				  struct flow_cls_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow,
-				  enum nfp_flower_tun_type tun_type);
+				  enum nfp_flower_tun_type tun_type,
+				  struct netlink_ext_ack *extack);
 int nfp_flower_compile_action(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct net_device *netdev,
-			      struct nfp_fl_payload *nfp_flow);
+			      struct nfp_fl_payload *nfp_flow,
+			      struct netlink_ext_ack *extack);
 int nfp_compile_flow_metadata(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct nfp_fl_payload *nfp_flow,
-			      struct net_device *netdev);
+			      struct net_device *netdev,
+			      struct netlink_ext_ack *extack);
 void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv,
 				struct nfp_fl_payload *nfp_flow);
 int nfp_modify_flow_metadata(struct nfp_app *app,
@@ -389,7 +392,8 @@ int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv,
 bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb);
 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
 				       struct net_device *master,
-				       struct nfp_fl_pre_lag *pre_act);
+				       struct nfp_fl_pre_lag *pre_act,
+				       struct netlink_ext_ack *extack);
 int nfp_flower_lag_get_output_id(struct nfp_app *app,
 				 struct net_device *master);
 void nfp_flower_qos_init(struct nfp_app *app);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index bfa4bf34911d..9cc3ba17ff69 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -10,9 +10,9 @@
 static void
 nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
 			    struct nfp_flower_meta_tci *msk,
-			    struct tc_cls_flower_offload *flow, u8 key_type)
+			    struct flow_cls_offload *flow, u8 key_type)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	u16 tmp_tci;
 
 	memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
@@ -54,7 +54,8 @@ nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext)
 
 static int
 nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
-			bool mask_version, enum nfp_flower_tun_type tun_type)
+			bool mask_version, enum nfp_flower_tun_type tun_type,
+			struct netlink_ext_ack *extack)
 {
 	if (mask_version) {
 		frame->in_port = cpu_to_be32(~0);
@@ -64,8 +65,10 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
 	if (tun_type) {
 		frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
 	} else {
-		if (!cmsg_port)
+		if (!cmsg_port) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid ingress interface for match offload");
 			return -EOPNOTSUPP;
+		}
 		frame->in_port = cpu_to_be32(cmsg_port);
 	}
 
@@ -75,9 +78,9 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
 static void
 nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
 		       struct nfp_flower_mac_mpls *msk,
-		       struct tc_cls_flower_offload *flow)
+		       struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
 	memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
 	memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
@@ -127,9 +130,9 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
 static void
 nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
 			 struct nfp_flower_tp_ports *msk,
-			 struct tc_cls_flower_offload *flow)
+			 struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
 	memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
 	memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
@@ -148,9 +151,9 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
 static void
 nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
 			  struct nfp_flower_ip_ext *msk,
-			  struct tc_cls_flower_offload *flow)
+			  struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 		struct flow_match_basic match;
@@ -222,9 +225,9 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
 static void
 nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
 			struct nfp_flower_ipv4 *msk,
-			struct tc_cls_flower_offload *flow)
+			struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	struct flow_match_ipv4_addrs match;
 
 	memset(ext, 0, sizeof(struct nfp_flower_ipv4));
@@ -244,9 +247,9 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
 static void
 nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
 			struct nfp_flower_ipv6 *msk,
-			struct tc_cls_flower_offload *flow)
+			struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
 	memset(ext, 0, sizeof(struct nfp_flower_ipv6));
 	memset(msk, 0, sizeof(struct nfp_flower_ipv6));
@@ -266,7 +269,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
 
 static int
 nfp_flower_compile_geneve_opt(void *ext, void *msk,
-			      struct tc_cls_flower_offload *flow)
+			      struct flow_cls_offload *flow)
 {
 	struct flow_match_enc_opts match;
 
@@ -278,11 +281,76 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk,
 }
 
 static void
+nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
+				  struct nfp_flower_tun_ipv4 *msk,
+				  struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
+		ext->src = match.key->src;
+		ext->dst = match.key->dst;
+		msk->src = match.mask->src;
+		msk->dst = match.mask->dst;
+	}
+}
+
+static void
+nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
+			      struct nfp_flower_tun_ip_ext *msk,
+			      struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_enc_ip(rule, &match);
+		ext->tos = match.key->tos;
+		ext->ttl = match.key->ttl;
+		msk->tos = match.mask->tos;
+		msk->ttl = match.mask->ttl;
+	}
+}
+
+static void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+				struct nfp_flower_ipv4_gre_tun *msk,
+				struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+
+	/* NVGRE is the only supported GRE tunnel type */
+	ext->ethertype = cpu_to_be16(ETH_P_TEB);
+	msk->ethertype = cpu_to_be16(~0);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		ext->tun_key = match.key->keyid;
+		msk->tun_key = match.mask->keyid;
+
+		ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+		msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+	}
+
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+}
+
+static void
 nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
 				struct nfp_flower_ipv4_udp_tun *msk,
-				struct tc_cls_flower_offload *flow)
+				struct flow_cls_offload *flow)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
 	memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
 	memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
@@ -298,33 +366,17 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
 		msk->tun_id = cpu_to_be32(temp_vni);
 	}
 
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-		struct flow_match_ipv4_addrs match;
-
-		flow_rule_match_enc_ipv4_addrs(rule, &match);
-		ext->ip_src = match.key->src;
-		ext->ip_dst = match.key->dst;
-		msk->ip_src = match.mask->src;
-		msk->ip_dst = match.mask->dst;
-	}
-
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
-		struct flow_match_ip match;
-
-		flow_rule_match_enc_ip(rule, &match);
-		ext->tos = match.key->tos;
-		ext->ttl = match.key->ttl;
-		msk->tos = match.mask->tos;
-		msk->ttl = match.mask->ttl;
-	}
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
 }
 
 int nfp_flower_compile_flow_match(struct nfp_app *app,
-				  struct tc_cls_flower_offload *flow,
+				  struct flow_cls_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow,
-				  enum nfp_flower_tun_type tun_type)
+				  enum nfp_flower_tun_type tun_type,
+				  struct netlink_ext_ack *extack)
 {
 	u32 port_id;
 	int err;
@@ -357,13 +409,13 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 
 	/* Populate Exact Port data. */
 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
-				      port_id, false, tun_type);
+				      port_id, false, tun_type, extack);
 	if (err)
 		return err;
 
 	/* Populate Mask Port Data. */
 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
-				      port_id, true, tun_type);
+				      port_id, true, tun_type, extack);
 	if (err)
 		return err;
 
@@ -402,12 +454,27 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
 
+	if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+		__be32 tun_dst;
+
+		nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow);
+		tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
+		ext += sizeof(struct nfp_flower_ipv4_gre_tun);
+		msk += sizeof(struct nfp_flower_ipv4_gre_tun);
+
+		/* Store the tunnel destination in the rule data.
+		 * This must be present and be an exact match.
+		 */
+		nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+		nfp_tunnel_add_ipv4_off(app, tun_dst);
+	}
+
 	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
 	    key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
 		__be32 tun_dst;
 
 		nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
-		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst;
+		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
 		ext += sizeof(struct nfp_flower_ipv4_udp_tun);
 		msk += sizeof(struct nfp_flower_ipv4_udp_tun);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 3d326efdc814..7c4a15e967df 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -290,9 +290,10 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
 }
 
 int nfp_compile_flow_metadata(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct nfp_fl_payload *nfp_flow,
-			      struct net_device *netdev)
+			      struct net_device *netdev,
+			      struct netlink_ext_ack *extack)
 {
 	struct nfp_fl_stats_ctx_to_flow *ctx_entry;
 	struct nfp_flower_priv *priv = app->priv;
@@ -302,8 +303,10 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
 	int err;
 
 	err = nfp_get_stats_entry(app, &stats_cxt);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate new stats context");
 		return err;
+	}
 
 	nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
 	nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
@@ -328,6 +331,12 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
 	if (!nfp_check_mask_add(app, nfp_flow->mask_data,
 				nfp_flow->meta.mask_len,
 				&nfp_flow->meta.flags, &new_mask_id)) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id");
+		if (nfp_release_stats_entry(app, stats_cxt)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context");
+			err = -EINVAL;
+			goto err_remove_rhash;
+		}
 		err = -ENOENT;
 		goto err_remove_rhash;
 	}
@@ -343,6 +352,21 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
 
 	check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev);
 	if (check_entry) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry");
+		if (nfp_release_stats_entry(app, stats_cxt)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context");
+			err = -EINVAL;
+			goto err_remove_mask;
+		}
+
+		if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
+					   nfp_flow->meta.mask_len,
+					   NULL, &new_mask_id)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id");
+			err = -EINVAL;
+			goto err_remove_mask;
+		}
+
 		err = -EEXIST;
 		goto err_remove_mask;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 1fbfeb43c538..7e725fa60347 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -52,8 +52,7 @@
 
 #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
 	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
-	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
-	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS))
 
 #define NFP_FLOWER_MERGE_FIELDS \
 	(NFP_FLOWER_LAYER_PORT | \
@@ -122,9 +121,9 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
 	return 0;
 }
 
-static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
+static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 
 	return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
 	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
@@ -132,14 +131,25 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
 	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
 }
 
+static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+	return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
+}
+
 static int
-nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts,
-			  u32 *key_layer_two, int *key_size)
+nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
+			  u32 *key_layer_two, int *key_size,
+			  struct netlink_ext_ack *extack)
 {
-	if (enc_opts->key->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length");
 		return -EOPNOTSUPP;
+	}
 
-	if (enc_opts->key->len > 0) {
+	if (enc_opts->len > 0) {
 		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
 		*key_size += sizeof(struct nfp_flower_geneve_options);
 	}
@@ -148,13 +158,65 @@ nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts,
 }
 
 static int
+nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
+			      struct flow_dissector_key_enc_opts *enc_op,
+			      u32 *key_layer_two, u8 *key_layer, int *key_size,
+			      struct nfp_flower_priv *priv,
+			      enum nfp_flower_tun_type *tun_type,
+			      struct netlink_ext_ack *extack)
+{
+	int err;
+
+	switch (enc_ports->dst) {
+	case htons(IANA_VXLAN_UDP_PORT):
+		*tun_type = NFP_FL_TUNNEL_VXLAN;
+		*key_layer |= NFP_FLOWER_LAYER_VXLAN;
+		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (enc_op) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
+			return -EOPNOTSUPP;
+		}
+		break;
+	case htons(GENEVE_UDP_PORT):
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload");
+			return -EOPNOTSUPP;
+		}
+		*tun_type = NFP_FL_TUNNEL_GENEVE;
+		*key_layer |= NFP_FLOWER_LAYER_EXT_META;
+		*key_size += sizeof(struct nfp_flower_ext_meta);
+		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
+		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (!enc_op)
+			break;
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
+			return -EOPNOTSUPP;
+		}
+		err = nfp_flower_calc_opt_layer(enc_op, key_layer_two,
+						key_size, extack);
+		if (err)
+			return err;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
 nfp_flower_calculate_key_layers(struct nfp_app *app,
 				struct net_device *netdev,
 				struct nfp_fl_key_ls *ret_key_ls,
-				struct tc_cls_flower_offload *flow,
-				enum nfp_flower_tun_type *tun_type)
+				struct flow_cls_offload *flow,
+				enum nfp_flower_tun_type *tun_type,
+				struct netlink_ext_ack *extack)
 {
-	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	struct flow_dissector *dissector = rule->match.dissector;
 	struct flow_match_basic basic = { NULL, NULL};
 	struct nfp_flower_priv *priv = app->priv;
@@ -163,14 +225,18 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 	int key_size;
 	int err;
 
-	if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
+	if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match not supported");
 		return -EOPNOTSUPP;
+	}
 
 	/* If any tun dissector is used then the required set must be used. */
 	if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
 	    (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
-	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported");
 		return -EOPNOTSUPP;
+	}
 
 	key_layer_two = 0;
 	key_layer = NFP_FLOWER_LAYER_PORT;
@@ -188,8 +254,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 
 		flow_rule_match_vlan(rule, &vlan);
 		if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) &&
-		    vlan.key->vlan_priority)
+		    vlan.key->vlan_priority) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload");
 			return -EOPNOTSUPP;
+		}
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
@@ -200,56 +268,68 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 
 		flow_rule_match_enc_control(rule, &enc_ctl);
 
-		if (enc_ctl.mask->addr_type != 0xffff ||
-		    enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+		if (enc_ctl.mask->addr_type != 0xffff) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported");
+			return -EOPNOTSUPP;
+		}
+		if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported");
 			return -EOPNOTSUPP;
+		}
 
 		/* These fields are already verified as used. */
 		flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
-		if (ipv4_addrs.mask->dst != cpu_to_be32(~0))
-			return -EOPNOTSUPP;
-
-		flow_rule_match_enc_ports(rule, &enc_ports);
-		if (enc_ports.mask->dst != cpu_to_be16(~0))
+		if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
 			return -EOPNOTSUPP;
+		}
 
 		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
 			flow_rule_match_enc_opts(rule, &enc_op);
 
-		switch (enc_ports.key->dst) {
-		case htons(IANA_VXLAN_UDP_PORT):
-			*tun_type = NFP_FL_TUNNEL_VXLAN;
-			key_layer |= NFP_FLOWER_LAYER_VXLAN;
-			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
 
-			if (enc_op.key)
+		if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+			/* check if GRE, which has no enc_ports */
+			if (netif_is_gretap(netdev)) {
+				*tun_type = NFP_FL_TUNNEL_GRE;
+				key_layer |= NFP_FLOWER_LAYER_EXT_META;
+				key_size += sizeof(struct nfp_flower_ext_meta);
+				key_layer_two |= NFP_FLOWER_LAYER2_GRE;
+				key_size +=
+					sizeof(struct nfp_flower_ipv4_gre_tun);
+
+				if (enc_op.key) {
+					NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
+					return -EOPNOTSUPP;
+				}
+			} else {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
 				return -EOPNOTSUPP;
-			break;
-		case htons(GENEVE_UDP_PORT):
-			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE))
+			}
+		} else {
+			flow_rule_match_enc_ports(rule, &enc_ports);
+			if (enc_ports.mask->dst != cpu_to_be16(~0)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported");
 				return -EOPNOTSUPP;
-			*tun_type = NFP_FL_TUNNEL_GENEVE;
-			key_layer |= NFP_FLOWER_LAYER_EXT_META;
-			key_size += sizeof(struct nfp_flower_ext_meta);
-			key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
-			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+			}
 
-			if (!enc_op.key)
-				break;
-			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
-				return -EOPNOTSUPP;
-			err = nfp_flower_calc_opt_layer(&enc_op, &key_layer_two,
-							&key_size);
+			err = nfp_flower_calc_udp_tun_layer(enc_ports.key,
+							    enc_op.key,
+							    &key_layer_two,
+							    &key_layer,
+							    &key_size, priv,
+							    tun_type, extack);
 			if (err)
 				return err;
-			break;
-		default:
-			return -EOPNOTSUPP;
-		}
 
-		/* Ensure the ingress netdev matches the expected tun type. */
-		if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type))
-			return -EOPNOTSUPP;
+			/* Ensure the ingress netdev matches the expected
+			 * tun type.
+			 */
+			if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type");
+				return -EOPNOTSUPP;
+			}
+		}
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC))
@@ -272,6 +352,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		 * because we rely on it to get to the host.
 		 */
 		case cpu_to_be16(ETH_P_ARP):
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ARP not supported");
 			return -EOPNOTSUPP;
 
 		case cpu_to_be16(ETH_P_MPLS_UC):
@@ -290,14 +371,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			/* Other ethtype - we need check the masks for the
 			 * remainder of the key to ensure we can offload.
 			 */
-			if (nfp_flower_check_higher_than_mac(flow))
+			if (nfp_flower_check_higher_than_mac(flow)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: non IPv4/IPv6 offload with L3/L4 matches not supported");
 				return -EOPNOTSUPP;
+			}
 			break;
 		}
 	}
 
 	if (basic.mask && basic.mask->ip_proto) {
-		/* Ethernet type is present in the key. */
 		switch (basic.key->ip_proto) {
 		case IPPROTO_TCP:
 		case IPPROTO_UDP:
@@ -311,7 +393,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			/* Other ip proto - we need check the masks for the
 			 * remainder of the key to ensure we can offload.
 			 */
-			return -EOPNOTSUPP;
+			if (nfp_flower_check_higher_than_l3(flow)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unknown IP protocol with L4 matches not supported");
+				return -EOPNOTSUPP;
+			}
+			break;
 		}
 	}
 
@@ -322,22 +408,28 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		flow_rule_match_tcp(rule, &tcp);
 		tcp_flags = be16_to_cpu(tcp.key->flags);
 
-		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: no match support for selected TCP flags");
 			return -EOPNOTSUPP;
+		}
 
 		/* We only support PSH and URG flags when either
 		 * FIN, SYN or RST is present as well.
 		 */
 		if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
-		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: PSH and URG is only supported when used with FIN, SYN or RST");
 			return -EOPNOTSUPP;
+		}
 
 		/* We need to store TCP flags in the either the IPv4 or IPv6 key
 		 * space, thus we need to ensure we include a IPv4/IPv6 key
 		 * layer if we have not done so already.
 		 */
-		if (!basic.key)
+		if (!basic.key) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on L3 protocol");
 			return -EOPNOTSUPP;
+		}
 
 		if (!(key_layer & NFP_FLOWER_LAYER_IPV4) &&
 		    !(key_layer & NFP_FLOWER_LAYER_IPV6)) {
@@ -353,6 +445,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 				break;
 
 			default:
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on IPv4/IPv6");
 				return -EOPNOTSUPP;
 			}
 		}
@@ -362,8 +455,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		struct flow_match_control ctl;
 
 		flow_rule_match_control(rule, &ctl);
-		if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
+		if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag");
 			return -EOPNOTSUPP;
+		}
 	}
 
 	ret_key_ls->key_layer = key_layer;
@@ -771,14 +866,16 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
 				     struct nfp_fl_payload *sub_flow1,
 				     struct nfp_fl_payload *sub_flow2)
 {
-	struct tc_cls_flower_offload merge_tc_off;
+	struct flow_cls_offload merge_tc_off;
 	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *merge_flow;
 	struct nfp_fl_key_ls merge_key_ls;
 	int err;
 
 	ASSERT_RTNL();
 
+	extack = merge_tc_off.common.extack;
 	if (sub_flow1 == sub_flow2 ||
 	    nfp_flower_is_merge_flow(sub_flow1) ||
 	    nfp_flower_is_merge_flow(sub_flow2))
@@ -816,7 +913,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
 
 	merge_tc_off.cookie = merge_flow->tc_flower_cookie;
 	err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow,
-					merge_flow->ingress_dev);
+					merge_flow->ingress_dev, extack);
 	if (err)
 		goto err_unlink_sub_flow2;
 
@@ -865,15 +962,17 @@ err_destroy_merge_flow:
  */
 static int
 nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
-		       struct tc_cls_flower_offload *flow)
+		       struct flow_cls_offload *flow)
 {
 	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
 	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *flow_pay;
 	struct nfp_fl_key_ls *key_layer;
 	struct nfp_port *port = NULL;
 	int err;
 
+	extack = flow->common.extack;
 	if (nfp_netdev_is_nfp_repr(netdev))
 		port = nfp_port_from_netdev(netdev);
 
@@ -882,7 +981,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 		return -ENOMEM;
 
 	err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow,
-					      &tun_type);
+					      &tun_type, extack);
 	if (err)
 		goto err_free_key_ls;
 
@@ -893,23 +992,25 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	}
 
 	err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev,
-					    flow_pay, tun_type);
+					    flow_pay, tun_type, extack);
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_flower_compile_action(app, flow, netdev, flow_pay);
+	err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack);
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev);
+	err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack);
 	if (err)
 		goto err_destroy_flow;
 
 	flow_pay->tc_flower_cookie = flow->cookie;
 	err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
 				     nfp_flower_table_params);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot insert flow into tables for offloads");
 		goto err_release_metadata;
+	}
 
 	err = nfp_flower_xmit_flow(app, flow_pay,
 				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
@@ -1024,19 +1125,23 @@ nfp_flower_del_linked_merge_flows(struct nfp_app *app,
  */
 static int
 nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
-		       struct tc_cls_flower_offload *flow)
+		       struct flow_cls_offload *flow)
 {
 	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *nfp_flow;
 	struct nfp_port *port = NULL;
 	int err;
 
+	extack = flow->common.extack;
 	if (nfp_netdev_is_nfp_repr(netdev))
 		port = nfp_port_from_netdev(netdev);
 
 	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
-	if (!nfp_flow)
+	if (!nfp_flow) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist");
 		return -ENOENT;
+	}
 
 	err = nfp_modify_flow_metadata(app, nfp_flow);
 	if (err)
@@ -1127,15 +1232,19 @@ nfp_flower_update_merge_stats(struct nfp_app *app,
  */
 static int
 nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
-		     struct tc_cls_flower_offload *flow)
+		     struct flow_cls_offload *flow)
 {
 	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *nfp_flow;
 	u32 ctx_id;
 
+	extack = flow->common.extack;
 	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
-	if (!nfp_flow)
+	if (!nfp_flow) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot dump stats for flow that does not exist");
 		return -EINVAL;
+	}
 
 	ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
 
@@ -1156,17 +1265,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
 
 static int
 nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
-			struct tc_cls_flower_offload *flower)
+			struct flow_cls_offload *flower)
 {
 	if (!eth_proto_is_802_3(flower->common.protocol))
 		return -EOPNOTSUPP;
 
 	switch (flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return nfp_flower_add_offload(app, netdev, flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return nfp_flower_del_offload(app, netdev, flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return nfp_flower_get_stats(app, netdev, flower);
 	default:
 		return -EOPNOTSUPP;
@@ -1193,27 +1302,45 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
 	}
 }
 
+static LIST_HEAD(nfp_block_cb_list);
+
 static int nfp_flower_setup_tc_block(struct net_device *netdev,
-				     struct tc_block_offload *f)
+				     struct flow_block_offload *f)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
 	struct nfp_flower_repr_priv *repr_priv;
+	struct flow_block_cb *block_cb;
 
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
 	repr_priv = repr->app_priv;
-	repr_priv->block_shared = tcf_block_shared(f->block);
+	repr_priv->block_shared = f->block_shared;
+	f->driver_block_list = &nfp_block_cb_list;
 
 	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     nfp_flower_setup_tc_block_cb,
-					     repr, repr, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					nfp_flower_setup_tc_block_cb,
-					repr);
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(nfp_flower_setup_tc_block_cb, repr,
+					  &nfp_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(f->net,
+					       nfp_flower_setup_tc_block_cb,
+					       repr, repr, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb,
+						repr);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1258,7 +1385,7 @@ static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
 					  void *type_data, void *cb_priv)
 {
 	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
-	struct tc_cls_flower_offload *flower = type_data;
+	struct flow_cls_offload *flower = type_data;
 
 	if (flower->common.chain_index)
 		return -EOPNOTSUPP;
@@ -1272,21 +1399,29 @@ static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
 	}
 }
 
+static void nfp_flower_setup_indr_tc_release(void *cb_priv)
+{
+	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
+
+	list_del(&priv->list);
+	kfree(priv);
+}
+
 static int
 nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
-			       struct tc_block_offload *f)
+			       struct flow_block_offload *f)
 {
 	struct nfp_flower_indr_block_cb_priv *cb_priv;
 	struct nfp_flower_priv *priv = app->priv;
-	int err;
+	struct flow_block_cb *block_cb;
 
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
-	    !(f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+	    !(f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
 	      nfp_flower_internal_port_can_offload(app, netdev)))
 		return -EOPNOTSUPP;
 
 	switch (f->command) {
-	case TC_BLOCK_BIND:
+	case FLOW_BLOCK_BIND:
 		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
 		if (!cb_priv)
 			return -ENOMEM;
@@ -1295,26 +1430,32 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 		cb_priv->app = app;
 		list_add(&cb_priv->list, &priv->indr_block_cb_priv);
 
-		err = tcf_block_cb_register(f->block,
-					    nfp_flower_setup_indr_block_cb,
-					    cb_priv, cb_priv, f->extack);
-		if (err) {
+		block_cb = flow_block_cb_alloc(f->net,
+					       nfp_flower_setup_indr_block_cb,
+					       cb_priv, cb_priv,
+					       nfp_flower_setup_indr_tc_release);
+		if (IS_ERR(block_cb)) {
 			list_del(&cb_priv->list);
 			kfree(cb_priv);
+			return PTR_ERR(block_cb);
 		}
 
-		return err;
-	case TC_BLOCK_UNBIND:
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
 		cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev);
 		if (!cb_priv)
 			return -ENOENT;
 
-		tcf_block_cb_unregister(f->block,
-					nfp_flower_setup_indr_block_cb,
-					cb_priv);
-		list_del(&cb_priv->list);
-		kfree(cb_priv);
+		block_cb = flow_block_cb_lookup(f,
+						nfp_flower_setup_indr_block_cb,
+						cb_priv);
+		if (!block_cb)
+			return -ENOENT;
 
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 8c67505865a4..a7a80f4b722a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -162,8 +162,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
 	}
 
 	pay_len = nfp_flower_cmsg_get_data_len(skb);
-	if (pay_len != sizeof(struct nfp_tun_active_tuns) +
-	    sizeof(struct route_ip_info) * count) {
+	if (pay_len != struct_size(payload, tun_info, count)) {
 		nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
 		return;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 948d1a4b4643..60e57f08de80 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -596,6 +596,10 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 	struct nfp_pf *pf;
 	int err;
 
+	if (pdev->vendor == PCI_VENDOR_ID_NETRONOME &&
+	    pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000_VF)
+		dev_warn(&pdev->dev, "Binding NFP VF device to the NFP PF driver, the VF driver is called 'nfp_netvf'\n");
+
 	err = pci_enable_device(pdev);
 	if (err < 0)
 		return err;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index df9aff2684ed..5d6c3738b494 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -12,11 +12,14 @@
 #ifndef _NFP_NET_H_
 #define _NFP_NET_H_
 
+#include <linux/atomic.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
 #include <net/xdp.h>
 
 #include "nfp_net_ctrl.h"
@@ -238,7 +241,7 @@ struct nfp_net_tx_ring {
 #define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
 #define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
 #define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_BPF		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_DECRYPTED		cpu_to_le16(BIT(8))
 #define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
 #define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
 #define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
@@ -365,6 +368,7 @@ struct nfp_net_rx_ring {
  * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK
  * @hw_csum_rx_complete: Counter of packets with CHECKSUM_COMPLETE reported
  * @hw_csum_rx_error:	 Counter of packets with bad checksums
+ * @hw_tls_rx:	    Number of packets with TLS decrypted by hardware
  * @tx_sync:	    Seqlock for atomic updates of TX stats
  * @tx_pkts:	    Number of Transmitted packets
  * @tx_bytes:	    Number of Transmitted bytes
@@ -372,6 +376,11 @@ struct nfp_net_rx_ring {
  * @hw_csum_tx_inner:	 Counter of inner TX checksum offload requests
  * @tx_gather:	    Counter of packets with Gather DMA
  * @tx_lso:	    Counter of LSO packets sent
+ * @hw_tls_tx:	    Counter of TLS packets sent with crypto offloaded to HW
+ * @tls_tx_fallback:	Counter of TLS packets sent which had to be encrypted
+ *			by the fallback path because packets came out of order
+ * @tls_tx_no_fallback:	Counter of TLS packets not sent because the fallback
+ *			path could not encrypt them
  * @tx_errors:	    How many TX errors were encountered
  * @tx_busy:        How often was TX busy (no space)?
  * @rx_replace_buf_alloc_fail:	Counter of RX buffer allocation failures
@@ -392,7 +401,7 @@ struct nfp_net_r_vector {
 		struct {
 			struct tasklet_struct tasklet;
 			struct sk_buff_head queue;
-			struct spinlock lock;
+			spinlock_t lock;
 		};
 	};
 
@@ -408,22 +417,30 @@ struct nfp_net_r_vector {
 	u64 hw_csum_rx_ok;
 	u64 hw_csum_rx_inner_ok;
 	u64 hw_csum_rx_complete;
+	u64 hw_tls_rx;
+
+	u64 hw_csum_rx_error;
+	u64 rx_replace_buf_alloc_fail;
 
 	struct nfp_net_tx_ring *xdp_ring;
 
 	struct u64_stats_sync tx_sync;
 	u64 tx_pkts;
 	u64 tx_bytes;
-	u64 hw_csum_tx;
+
+	u64 ____cacheline_aligned_in_smp hw_csum_tx;
 	u64 hw_csum_tx_inner;
 	u64 tx_gather;
 	u64 tx_lso;
+	u64 hw_tls_tx;
 
-	u64 hw_csum_rx_error;
-	u64 rx_replace_buf_alloc_fail;
+	u64 tls_tx_fallback;
+	u64 tls_tx_no_fallback;
 	u64 tx_errors;
 	u64 tx_busy;
 
+	/* Cold data follows */
+
 	u32 irq_vector;
 	irq_handler_t handler;
 	char name[IFNAMSIZ + 8];
@@ -458,6 +475,7 @@ struct nfp_stat_pair {
  * @netdev:		Backpointer to net_device structure
  * @is_vf:		Is the driver attached to a VF?
  * @chained_metadata_format:  Firemware will use new metadata format
+ * @ktls_tx:		Is kTLS TX enabled?
  * @rx_dma_dir:		Mapping direction for RX buffers
  * @rx_dma_off:		Offset at which DMA packets (for XDP headroom)
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -482,6 +500,7 @@ struct nfp_net_dp {
 
 	u8 is_vf:1;
 	u8 chained_metadata_format:1;
+	u8 ktls_tx:1;
 
 	u8 rx_dma_dir;
 	u8 rx_offset;
@@ -549,7 +568,7 @@ struct nfp_net_dp {
  * @reconfig_timer:	Timer for async reading of reconfig results
  * @reconfig_in_progress_update:	Update FW is processing now (debug only)
  * @bar_lock:		vNIC config BAR access lock, protects: update,
- *			mailbox area
+ *			mailbox area, crypto TLV
  * @link_up:            Is the link up?
  * @link_status_lock:	Protects @link_* and ensures atomicity with BAR reading
  * @rx_coalesce_usecs:      RX interrupt moderation usecs delay parameter
@@ -562,6 +581,18 @@ struct nfp_net_dp {
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @tlv_caps:		Parsed TLV capabilities
+ * @ktls_tx_conn_cnt:	Number of offloaded kTLS TX connections
+ * @ktls_rx_conn_cnt:	Number of offloaded kTLS RX connections
+ * @ktls_conn_id_gen:	Trivial generator for kTLS connection ids (for TX)
+ * @ktls_no_space:	Counter of firmware rejecting kTLS connection due to
+ *			lack of space
+ * @mbox_cmsg:		Common Control Message via vNIC mailbox state
+ * @mbox_cmsg.queue:	CCM mbox queue of pending messages
+ * @mbox_cmsg.wq:	CCM mbox wait queue of waiting processes
+ * @mbox_cmsg.workq:	CCM mbox work queue for @wait_work and @runq_work
+ * @mbox_cmsg.wait_work:    CCM mbox posted msg reconfig wait work
+ * @mbox_cmsg.runq_work:    CCM mbox posted msg queue runner work
+ * @mbox_cmsg.tag:	CCM mbox message tag allocator
  * @debugfs_dir:	Device directory in debugfs
  * @vnic_list:		Entry on device vNIC list
  * @pdev:		Backpointer to PCI device
@@ -620,7 +651,7 @@ struct nfp_net {
 	struct timer_list reconfig_timer;
 	u32 reconfig_in_progress_update;
 
-	struct mutex bar_lock;
+	struct semaphore bar_lock;
 
 	u32 rx_coalesce_usecs;
 	u32 rx_coalesce_max_frames;
@@ -637,6 +668,22 @@ struct nfp_net {
 
 	struct nfp_net_tlv_caps tlv_caps;
 
+	unsigned int ktls_tx_conn_cnt;
+	unsigned int ktls_rx_conn_cnt;
+
+	atomic64_t ktls_conn_id_gen;
+
+	atomic_t ktls_no_space;
+
+	struct {
+		struct sk_buff_head queue;
+		wait_queue_head_t wq;
+		struct workqueue_struct *workq;
+		struct work_struct wait_work;
+		struct work_struct runq_work;
+		u16 tag;
+	} mbox_cmsg;
+
 	struct dentry *debugfs_dir;
 
 	struct list_head vnic_list;
@@ -848,12 +895,17 @@ static inline void nfp_ctrl_unlock(struct nfp_net *nn)
 
 static inline void nn_ctrl_bar_lock(struct nfp_net *nn)
 {
-	mutex_lock(&nn->bar_lock);
+	down(&nn->bar_lock);
+}
+
+static inline bool nn_ctrl_bar_trylock(struct nfp_net *nn)
+{
+	return !down_trylock(&nn->bar_lock);
 }
 
 static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
 {
-	mutex_unlock(&nn->bar_lock);
+	up(&nn->bar_lock);
 }
 
 /* Globals */
@@ -883,6 +935,7 @@ void nfp_ctrl_close(struct nfp_net *nn);
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev);
 void nfp_net_info(struct nfp_net *nn);
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update);
 int nfp_net_reconfig(struct nfp_net *nn, u32 update);
 unsigned int nfp_net_rss_key_sz(struct nfp_net *nn);
 void nfp_net_rss_write_itbl(struct nfp_net *nn);
@@ -891,6 +944,8 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
 int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size);
 int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd);
 int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd);
+void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update);
+int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn);
 
 unsigned int
 nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 36a3bd30cfd9..9903805717da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/lockdep.h>
 #include <linux/mm.h>
 #include <linux/overflow.h>
 #include <linux/page_ref.h>
@@ -37,14 +36,17 @@
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
 
+#include <net/tls.h>
 #include <net/vxlan.h>
 
 #include "nfpcore/nfp_nsp.h"
+#include "ccm.h"
 #include "nfp_app.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
 #include "nfp_net_sriov.h"
 #include "nfp_port.h"
+#include "crypto/crypto.h"
 
 /**
  * nfp_net_get_fw_version() - Read and parse the FW version
@@ -228,6 +230,7 @@ static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 
 	spin_lock_bh(&nn->reconfig_lock);
 
+	WARN_ON(nn->reconfig_sync_present);
 	nn->reconfig_sync_present = true;
 
 	if (nn->reconfig_timer_active) {
@@ -271,12 +274,10 @@ static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
  *
  * Return: Negative errno on error, 0 on success
  */
-static int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 {
 	int ret;
 
-	lockdep_assert_held(&nn->bar_lock);
-
 	nfp_net_reconfig_sync_enter(nn);
 
 	nfp_net_reconfig_start(nn, update);
@@ -331,7 +332,6 @@ int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 	u32 mbox = nn->tlv_caps.mbox_off;
 	int ret;
 
-	lockdep_assert_held(&nn->bar_lock);
 	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 
 	ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
@@ -343,6 +343,24 @@ int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 }
 
+void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd)
+{
+	u32 mbox = nn->tlv_caps.mbox_off;
+
+	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
+
+	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX);
+}
+
+int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn)
+{
+	u32 mbox = nn->tlv_caps.mbox_off;
+
+	nfp_net_reconfig_wait_posted(nn);
+
+	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
+}
+
 int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
 {
 	int ret;
@@ -804,6 +822,99 @@ static void nfp_net_tx_csum(struct nfp_net_dp *dp,
 	u64_stats_update_end(&r_vec->tx_sync);
 }
 
+static struct sk_buff *
+nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+	       struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
+{
+#ifdef CONFIG_TLS_DEVICE
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct sk_buff *nskb;
+	bool resync_pending;
+	u32 datalen, seq;
+
+	if (likely(!dp->ktls_tx))
+		return skb;
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		return skb;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	seq = ntohl(tcp_hdr(skb)->seq);
+	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+	resync_pending = tls_offload_tx_resync_pending(skb->sk);
+	if (unlikely(resync_pending || ntls->next_seq != seq)) {
+		/* Pure ACK out of order already */
+		if (!datalen)
+			return skb;
+
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->tls_tx_fallback++;
+		u64_stats_update_end(&r_vec->tx_sync);
+
+		nskb = tls_encrypt_skb(skb);
+		if (!nskb) {
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tls_tx_no_fallback++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			return NULL;
+		}
+		/* encryption wasn't necessary */
+		if (nskb == skb)
+			return skb;
+		/* we don't re-check ring space */
+		if (unlikely(skb_is_nonlinear(nskb))) {
+			nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tx_errors++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			dev_kfree_skb_any(nskb);
+			return NULL;
+		}
+
+		/* jump forward, a TX may have gotten lost, need to sync TX */
+		if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
+			tls_offload_tx_resync_request(nskb->sk);
+
+		*nr_frags = 0;
+		return nskb;
+	}
+
+	if (datalen) {
+		u64_stats_update_begin(&r_vec->tx_sync);
+		if (!skb_is_gso(skb))
+			r_vec->hw_tls_tx++;
+		else
+			r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
+		u64_stats_update_end(&r_vec->tx_sync);
+	}
+
+	memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
+	ntls->next_seq += datalen;
+#endif
+	return skb;
+}
+
+static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
+{
+#ifdef CONFIG_TLS_DEVICE
+	struct nfp_net_tls_offload_ctx *ntls;
+	u32 datalen, seq;
+
+	if (!tls_handle)
+		return;
+	if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
+		return;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	seq = ntohl(tcp_hdr(skb)->seq);
+
+	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+	if (ntls->next_seq == seq + datalen)
+		ntls->next_seq = seq;
+	else
+		WARN_ON_ONCE(1);
+#endif
+}
+
 static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 {
 	wmb();
@@ -811,24 +922,47 @@ static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 	tx_ring->wr_ptr_add = 0;
 }
 
-static int nfp_net_prep_port_id(struct sk_buff *skb)
+static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
 	unsigned char *data;
+	u32 meta_id = 0;
+	int md_bytes;
 
-	if (likely(!md_dst))
-		return 0;
-	if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
+	if (likely(!md_dst && !tls_handle))
 		return 0;
+	if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
+		if (!tls_handle)
+			return 0;
+		md_dst = NULL;
+	}
+
+	md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
 
-	if (unlikely(skb_cow_head(skb, 8)))
+	if (unlikely(skb_cow_head(skb, md_bytes)))
 		return -ENOMEM;
 
-	data = skb_push(skb, 8);
-	put_unaligned_be32(NFP_NET_META_PORTID, data);
-	put_unaligned_be32(md_dst->u.port_info.port_id, data + 4);
+	meta_id = 0;
+	data = skb_push(skb, md_bytes) + md_bytes;
+	if (md_dst) {
+		data -= 4;
+		put_unaligned_be32(md_dst->u.port_info.port_id, data);
+		meta_id = NFP_NET_META_PORTID;
+	}
+	if (tls_handle) {
+		/* conn handle is opaque, we just use u64 to be able to quickly
+		 * compare it to zero
+		 */
+		data -= 8;
+		memcpy(data, &tls_handle, sizeof(tls_handle));
+		meta_id <<= NFP_NET_META_FIELD_SIZE;
+		meta_id |= NFP_NET_META_CONN_HANDLE;
+	}
+
+	data -= 4;
+	put_unaligned_be32(meta_id, data);
 
-	return 8;
+	return md_bytes;
 }
 
 /**
@@ -851,6 +985,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 	struct nfp_net_dp *dp;
 	dma_addr_t dma_addr;
 	unsigned int fsize;
+	u64 tls_handle = 0;
 	u16 qidx;
 
 	dp = &nn->dp;
@@ -872,18 +1007,21 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_BUSY;
 	}
 
-	md_bytes = nfp_net_prep_port_id(skb);
-	if (unlikely(md_bytes < 0)) {
+	skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
+	if (unlikely(!skb)) {
 		nfp_net_tx_xmit_more_flush(tx_ring);
-		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
 
+	md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
+	if (unlikely(md_bytes < 0))
+		goto err_flush;
+
 	/* Start with the head skbuf */
 	dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
 				  DMA_TO_DEVICE);
 	if (dma_mapping_error(dp->dev, dma_addr))
-		goto err_free;
+		goto err_dma_err;
 
 	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
 
@@ -979,12 +1117,14 @@ err_unmap:
 	tx_ring->txbufs[wr_idx].skb = NULL;
 	tx_ring->txbufs[wr_idx].dma_addr = 0;
 	tx_ring->txbufs[wr_idx].fidx = -2;
-err_free:
+err_dma_err:
 	nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
+err_flush:
 	nfp_net_tx_xmit_more_flush(tx_ring);
 	u64_stats_update_begin(&r_vec->tx_sync);
 	r_vec->tx_errors++;
 	u64_stats_update_end(&r_vec->tx_sync);
+	nfp_net_tls_tx_undo(skb, tls_handle);
 	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
@@ -1857,6 +1997,15 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
 
+#ifdef CONFIG_TLS_DEVICE
+		if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
+			skb->decrypted = true;
+			u64_stats_update_begin(&r_vec->rx_sync);
+			r_vec->hw_tls_rx++;
+			u64_stats_update_end(&r_vec->rx_sync);
+		}
+#endif
+
 		if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       le16_to_cpu(rxd->rxd.vlan));
@@ -3705,7 +3854,7 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 	nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
 	nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
-	mutex_init(&nn->bar_lock);
+	sema_init(&nn->bar_lock, 1);
 
 	spin_lock_init(&nn->reconfig_lock);
 	spin_lock_init(&nn->link_status_lock);
@@ -3717,6 +3866,10 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 	if (err)
 		goto err_free_nn;
 
+	err = nfp_ccm_mbox_alloc(nn);
+	if (err)
+		goto err_free_nn;
+
 	return nn;
 
 err_free_nn:
@@ -3734,8 +3887,7 @@ err_free_nn:
 void nfp_net_free(struct nfp_net *nn)
 {
 	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
-
-	mutex_destroy(&nn->bar_lock);
+	nfp_ccm_mbox_free(nn);
 
 	if (nn->dp.netdev)
 		free_netdev(nn->dp.netdev);
@@ -4010,14 +4162,27 @@ int nfp_net_init(struct nfp_net *nn)
 	if (err)
 		return err;
 
-	if (nn->dp.netdev)
+	if (nn->dp.netdev) {
 		nfp_net_netdev_init(nn);
 
+		err = nfp_ccm_mbox_init(nn);
+		if (err)
+			return err;
+
+		err = nfp_net_tls_init(nn);
+		if (err)
+			goto err_clean_mbox;
+	}
+
 	nfp_net_vecs_init(nn);
 
 	if (!nn->dp.netdev)
 		return 0;
 	return register_netdev(nn->dp.netdev);
+
+err_clean_mbox:
+	nfp_ccm_mbox_clean(nn);
+	return err;
 }
 
 /**
@@ -4030,5 +4195,6 @@ void nfp_net_clean(struct nfp_net *nn)
 		return;
 
 	unregister_netdev(nn->dp.netdev);
+	nfp_ccm_mbox_clean(nn);
 	nfp_net_reconfig_wait_posted(nn);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index 6d5213b5bcb0..d835c14b7257 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -99,6 +99,21 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
 
 			caps->repr_cap = readl(data);
 			break;
+		case NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+			if (length >= 4)
+				caps->mbox_cmsg_types = readl(data);
+			break;
+		case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+			if (length < 32) {
+				dev_err(dev,
+					"CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
+					length, offset);
+				return -EINVAL;
+			}
+
+			caps->crypto_ops = readl(data);
+			caps->crypto_enable_off = data - ctrl_mem + 16;
+			break;
 		default:
 			if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
 				break;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 25919e338071..ee6b24e4eacd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -44,6 +44,7 @@
 #define NFP_NET_META_MARK		2
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
+#define NFP_NET_META_CONN_HANDLE	7
 
 #define NFP_META_PORT_ID_CTRL		~0U
 
@@ -135,6 +136,7 @@
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
+#define   NFP_NET_CFG_UPDATE_CRYPTO	  (0x1 << 14) /* Crypto on/off */
 #define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE		0x0008
 #define NFP_NET_CFG_RXRS_ENABLE		0x0010
@@ -394,6 +396,7 @@
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
 
 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET	5
+#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG			6
 
 /**
  * VLAN filtering using general use mailbox
@@ -466,6 +469,16 @@
  * %NFP_NET_CFG_TLV_TYPE_REPR_CAP:
  * Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which
  * can be used on representors.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+ * Variable, bitmap of control message types supported by the mailbox handler.
+ * Bit 0 corresponds to message type 0, bit 1 to 1, etc.  Control messages are
+ * encapsulated into simple TLVs, with an end TLV and written to the Mailbox.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+ * 8 words, bitmaps of supported and enabled crypto operations.
+ * First 16B (4 words) contains a bitmap of supported crypto operations,
+ * and next 16B contain the enabled operations.
  */
 #define NFP_NET_CFG_TLV_TYPE_UNKNOWN		0
 #define NFP_NET_CFG_TLV_TYPE_RESERVED		1
@@ -475,6 +488,8 @@
 #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0	5
 #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1	6
 #define NFP_NET_CFG_TLV_TYPE_REPR_CAP		7
+#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES	10
+#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS		11 /* see crypto/fw.h */
 
 struct device;
 
@@ -484,12 +499,18 @@ struct device;
  * @mbox_off:		vNIC mailbox area offset
  * @mbox_len:		vNIC mailbox area length
  * @repr_cap:		capabilities for representors
+ * @mbox_cmsg_types:	cmsgs which can be passed through the mailbox
+ * @crypto_ops:		supported crypto operations
+ * @crypto_enable_off:	offset of crypto ops enable region
  */
 struct nfp_net_tlv_caps {
 	u32 me_freq_mhz;
 	unsigned int mbox_off;
 	unsigned int mbox_len;
 	u32 repr_cap;
+	u32 mbox_cmsg_types;
+	u32 crypto_ops;
+	unsigned int crypto_enable_off;
 };
 
 int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 851e31e0ba8e..d9cbe84ac6ad 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -150,8 +150,9 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = {
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
-#define NN_RVEC_GATHER_STATS	9
+#define NN_RVEC_GATHER_STATS	13
 #define NN_RVEC_PER_Q_STATS	3
+#define NN_CTRL_PATH_STATS	1
 
 #define SFP_SFF_REV_COMPLIANCE	1
 
@@ -423,7 +424,8 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS;
+	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS +
+		NN_CTRL_PATH_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
@@ -442,10 +444,16 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	data = nfp_pr_et(data, "hw_rx_csum_complete");
 	data = nfp_pr_et(data, "hw_rx_csum_err");
 	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
+	data = nfp_pr_et(data, "rx_tls_decrypted");
 	data = nfp_pr_et(data, "hw_tx_csum");
 	data = nfp_pr_et(data, "hw_tx_inner_csum");
 	data = nfp_pr_et(data, "tx_gather");
 	data = nfp_pr_et(data, "tx_lso");
+	data = nfp_pr_et(data, "tx_tls_encrypted");
+	data = nfp_pr_et(data, "tx_tls_ooo");
+	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
+
+	data = nfp_pr_et(data, "hw_tls_no_space");
 
 	return data;
 }
@@ -468,16 +476,20 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 			tmp[2] = nn->r_vecs[i].hw_csum_rx_complete;
 			tmp[3] = nn->r_vecs[i].hw_csum_rx_error;
 			tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail;
+			tmp[5] = nn->r_vecs[i].hw_tls_rx;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start));
 
 		do {
 			start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync);
 			data[1] = nn->r_vecs[i].tx_pkts;
 			data[2] = nn->r_vecs[i].tx_busy;
-			tmp[5] = nn->r_vecs[i].hw_csum_tx;
-			tmp[6] = nn->r_vecs[i].hw_csum_tx_inner;
-			tmp[7] = nn->r_vecs[i].tx_gather;
-			tmp[8] = nn->r_vecs[i].tx_lso;
+			tmp[6] = nn->r_vecs[i].hw_csum_tx;
+			tmp[7] = nn->r_vecs[i].hw_csum_tx_inner;
+			tmp[8] = nn->r_vecs[i].tx_gather;
+			tmp[9] = nn->r_vecs[i].tx_lso;
+			tmp[10] = nn->r_vecs[i].hw_tls_tx;
+			tmp[11] = nn->r_vecs[i].tls_tx_fallback;
+			tmp[12] = nn->r_vecs[i].tls_tx_no_fallback;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
 
 		data += NN_RVEC_PER_Q_STATS;
@@ -489,6 +501,8 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 	for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
 		*data++ = gathered_stats[j];
 
+	*data++ = atomic_read(&nn->ktls_no_space);
+
 	return data;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 42cf4fd875ea..9a08623c325d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -241,11 +241,16 @@ static int nfp_nsp_check(struct nfp_nsp *state)
 	state->ver.major = FIELD_GET(NSP_STATUS_MAJOR, reg);
 	state->ver.minor = FIELD_GET(NSP_STATUS_MINOR, reg);
 
-	if (state->ver.major != NSP_MAJOR || state->ver.minor < NSP_MINOR) {
+	if (state->ver.major != NSP_MAJOR) {
 		nfp_err(cpp, "Unsupported ABI %hu.%hu\n",
 			state->ver.major, state->ver.minor);
 		return -EINVAL;
 	}
+	if (state->ver.minor < NSP_MINOR) {
+		nfp_err(cpp, "ABI too old to support NIC operation (%u.%hu < %u.%u), please update the management FW on the flash\n",
+			NSP_MAJOR, state->ver.minor, NSP_MAJOR, NSP_MINOR);
+		return -EINVAL;
+	}
 
 	if (reg & NSP_STATUS_BUSY) {
 		nfp_err(cpp, "Service processor busy!\n");
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 96f7a9818294..0b384f97d2fd 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -990,7 +990,7 @@ static void nixge_ethtools_get_drvinfo(struct net_device *ndev,
 				       struct ethtool_drvinfo *ed)
 {
 	strlcpy(ed->driver, "nixge", sizeof(ed->driver));
-	strlcpy(ed->bus_info, "platform", sizeof(ed->driver));
+	strlcpy(ed->bus_info, "platform", sizeof(ed->bus_info));
 }
 
 static int nixge_ethtools_get_coalesce(struct net_device *ndev,
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index bf5a7bca0298..be6660128b55 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -1042,7 +1042,6 @@ static int pasemi_mac_phy_init(struct net_device *dev)
 
 	dn = pci_device_to_OF_node(mac->pdev);
 	phy_dn = of_parse_phandle(dn, "phy-handle", 0);
-	of_node_put(phy_dn);
 
 	mac->link = 0;
 	mac->speed = 0;
@@ -1051,6 +1050,7 @@ static int pasemi_mac_phy_init(struct net_device *dev)
 	phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0,
 				PHY_INTERFACE_MODE_SGMII);
 
+	of_node_put(phy_dn);
 	if (!phydev) {
 		printk(KERN_ERR "%s: Could not attach to phy\n", dev->name);
 		return -ENODEV;
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index fdbb3ce00e20..a391cf6ee4b2 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -87,6 +87,7 @@ config QED
 	depends on PCI
 	select ZLIB_INFLATE
 	select CRC8
+	select NET_DEVLINK
 	---help---
 	  This enables the support for ...
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 84cb62434556..58e2eaf77014 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -3248,6 +3248,7 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
 		struct net_device *dev, unsigned long event)
 {
 	struct in_device *indev;
+	struct in_ifaddr *ifa;
 
 	if (!netxen_destip_supported(adapter))
 		return;
@@ -3256,7 +3257,8 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
 	if (!indev)
 		return;
 
-	for_ifa(indev) {
+	rcu_read_lock();
+	in_dev_for_each_ifa_rcu(ifa, indev) {
 		switch (event) {
 		case NETDEV_UP:
 			netxen_list_config_ip(adapter, ifa, NX_IP_UP);
@@ -3267,8 +3269,8 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
 		default:
 			break;
 		}
-	} endfor_ifa(indev);
-
+	}
+	rcu_read_unlock();
 	in_dev_put(indev);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index c5e96ce20f59..89fe091c958d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -140,6 +140,7 @@ struct qed_cxt_mngr;
 struct qed_sb_sp_info;
 struct qed_ll2_info;
 struct qed_mcp_info;
+struct qed_llh_info;
 
 struct qed_rt_data {
 	u32	*init_val;
@@ -741,6 +742,7 @@ struct qed_dev {
 #define QED_DEV_ID_MASK		0xff00
 #define QED_DEV_ID_MASK_BB	0x1600
 #define QED_DEV_ID_MASK_AH	0x8000
+#define QED_IS_E4(dev)  (QED_IS_BB(dev) || QED_IS_AH(dev))
 
 	u16	chip_num;
 #define CHIP_NUM_MASK                   0xffff
@@ -801,6 +803,11 @@ struct qed_dev {
 	u8				num_hwfns;
 	struct qed_hwfn			hwfns[MAX_HWFNS_PER_DEVICE];
 
+	/* Engine affinity */
+	u8				l2_affin_hint;
+	u8				fir_affin;
+	u8				iwarp_affin;
+
 	/* SRIOV */
 	struct qed_hw_sriov_info *p_iov_info;
 #define IS_QED_SRIOV(cdev)              (!!(cdev)->p_iov_info)
@@ -815,6 +822,10 @@ struct qed_dev {
 	/* Recovery */
 	bool recov_in_prog;
 
+	/* LLH info */
+	u8 ppfid_bitmap;
+	struct qed_llh_info *p_llh_info;
+
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -852,6 +863,9 @@ struct qed_dev {
 	u32 rdma_max_inline;
 	u32 rdma_max_srq_sge;
 	u16 tunn_feature_mask;
+
+	struct devlink			*dl;
+	bool				iwarp_cmt;
 };
 
 #define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
@@ -904,6 +918,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
 			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+#define QED_IS_CMT(dev)		((dev)->num_hwfns > 1)
+/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */
+#define QED_FIR_AFFIN_HWFN(dev)		(&(dev)->hwfns[dev->fir_affin])
+#define QED_IWARP_AFFIN_HWFN(dev)       (&(dev)->hwfns[dev->iwarp_affin])
+#define QED_AFFIN_HWFN(dev)				   \
+	(QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \
+	 QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev))
+#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1)
 
 /* Flags for indication of required queues */
 #define PQ_FLAGS_RLS    (BIT(0))
@@ -923,8 +945,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
 u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 
-#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
-
 /* doorbell recovery mechanism */
 void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
 void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e61d1d905415..8e1bdf58b9e7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2351,7 +2351,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 
 	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
 	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
-			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
+			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
+			  NULL);
 
 	if (elem_type == QED_ELEM_CXT) {
 		u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2457,7 +2458,7 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
 				  (u64) (uintptr_t) &ilt_hw_entry,
 				  reg_offset,
 				  sizeof(ilt_hw_entry) / sizeof(u32),
-				  0);
+				  NULL);
 	}
 
 	qed_ptt_release(p_hwfn, p_ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index ab8cacbdee3e..5ea6c4fc6050 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -2534,7 +2534,7 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
 	    (len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
 	     wide_bus)) {
 		if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
-				       (u64)(uintptr_t)(dump_buf), len, 0))
+				       (u64)(uintptr_t)(dump_buf), len, NULL))
 			return len;
 		dev_data->use_dmae = 0;
 		DP_VERBOSE(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index fccdb06fc5c5..a1ebc2b1ca0b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -361,6 +361,927 @@ void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
 
 /******************** Doorbell Recovery end ****************/
 
+/********************************** NIG LLH ***********************************/
+
+enum qed_llh_filter_type {
+	QED_LLH_FILTER_TYPE_MAC,
+	QED_LLH_FILTER_TYPE_PROTOCOL,
+};
+
+struct qed_llh_mac_filter {
+	u8 addr[ETH_ALEN];
+};
+
+struct qed_llh_protocol_filter {
+	enum qed_llh_prot_filter_type_t type;
+	u16 source_port_or_eth_type;
+	u16 dest_port;
+};
+
+union qed_llh_filter {
+	struct qed_llh_mac_filter mac;
+	struct qed_llh_protocol_filter protocol;
+};
+
+struct qed_llh_filter_info {
+	bool b_enabled;
+	u32 ref_cnt;
+	enum qed_llh_filter_type type;
+	union qed_llh_filter filter;
+};
+
+struct qed_llh_info {
+	/* Number of LLH filters banks */
+	u8 num_ppfid;
+
+#define MAX_NUM_PPFID   8
+	u8 ppfid_array[MAX_NUM_PPFID];
+
+	/* Array of filters arrays:
+	 * "num_ppfid" elements of filters banks, where each is an array of
+	 * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters.
+	 */
+	struct qed_llh_filter_info **pp_filters;
+};
+
+static void qed_llh_free(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	u32 i;
+
+	if (p_llh_info) {
+		if (p_llh_info->pp_filters)
+			for (i = 0; i < p_llh_info->num_ppfid; i++)
+				kfree(p_llh_info->pp_filters[i]);
+
+		kfree(p_llh_info->pp_filters);
+	}
+
+	kfree(p_llh_info);
+	cdev->p_llh_info = NULL;
+}
+
+static int qed_llh_alloc(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info;
+	u32 size, i;
+
+	p_llh_info = kzalloc(sizeof(*p_llh_info), GFP_KERNEL);
+	if (!p_llh_info)
+		return -ENOMEM;
+	cdev->p_llh_info = p_llh_info;
+
+	for (i = 0; i < MAX_NUM_PPFID; i++) {
+		if (!(cdev->ppfid_bitmap & (0x1 << i)))
+			continue;
+
+		p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i;
+		DP_VERBOSE(cdev, QED_MSG_SP, "ppfid_array[%d] = %hhd\n",
+			   p_llh_info->num_ppfid, i);
+		p_llh_info->num_ppfid++;
+	}
+
+	size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters);
+	p_llh_info->pp_filters = kzalloc(size, GFP_KERNEL);
+	if (!p_llh_info->pp_filters)
+		return -ENOMEM;
+
+	size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+	    sizeof(**p_llh_info->pp_filters);
+	for (i = 0; i < p_llh_info->num_ppfid; i++) {
+		p_llh_info->pp_filters[i] = kzalloc(size, GFP_KERNEL);
+		if (!p_llh_info->pp_filters[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int qed_llh_shadow_sanity(struct qed_dev *cdev,
+				 u8 ppfid, u8 filter_idx, const char *action)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n",
+			  action, ppfid, p_llh_info->num_ppfid);
+		return -EINVAL;
+	}
+
+	if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n",
+			  action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define QED_LLH_INVALID_FILTER_IDX      0xff
+
+static int
+qed_llh_shadow_search_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "search");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!memcmp(p_filter, &p_filters[i].filter,
+			    sizeof(*p_filter))) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_get_free_idx(struct qed_dev *cdev, u8 ppfid, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "get_free_idx");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!p_filters[i].b_enabled) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+__qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    u8 filter_idx,
+			    enum qed_llh_filter_type type,
+			    union qed_llh_filter *p_filter, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "add");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		p_filters[filter_idx].b_enabled = true;
+		p_filters[filter_idx].type = type;
+		memcpy(&p_filters[filter_idx].filter, p_filter,
+		       sizeof(p_filters[filter_idx].filter));
+	}
+
+	*p_ref_cnt = ++p_filters[filter_idx].ref_cnt;
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			  u8 ppfid,
+			  enum qed_llh_filter_type type,
+			  union qed_llh_filter *p_filter,
+			  u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	/* Check if the same filter already exist */
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* Find a new entry in case of a new filter */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		rc = qed_llh_shadow_get_free_idx(cdev, ppfid, p_filter_idx);
+		if (rc)
+			return rc;
+	}
+
+	/* No free entry was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev,
+			  "Failed to find an empty LLH filter to utilize [ppfid %d]\n",
+			  ppfid);
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_add_filter(cdev, ppfid, *p_filter_idx, type,
+					   p_filter, p_ref_cnt);
+}
+
+static int
+__qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 filter_idx, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "remove");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		DP_NOTICE(cdev,
+			  "LLH shadow: trying to remove a filter with ref_cnt=0\n");
+		return -EINVAL;
+	}
+
+	*p_ref_cnt = --p_filters[filter_idx].ref_cnt;
+	if (!p_filters[filter_idx].ref_cnt)
+		memset(&p_filters[filter_idx],
+		       0, sizeof(p_filters[filter_idx]));
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter,
+			     u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* No matching filter was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev, "Failed to find a filter in the LLH shadow\n");
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_remove_filter(cdev, ppfid, *p_filter_idx,
+					      p_ref_cnt);
+}
+
+static int qed_llh_abs_ppfid(struct qed_dev *cdev, u8 ppfid, u8 *p_abs_ppfid)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "ppfid %d is not valid, available indices are 0..%hhd\n",
+			  ppfid, p_llh_info->num_ppfid - 1);
+		*p_abs_ppfid = 0;
+		return -EINVAL;
+	}
+
+	*p_abs_ppfid = p_llh_info->ppfid_array[ppfid];
+
+	return 0;
+}
+
+static int
+qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_eng eng;
+	u8 ppfid;
+	int rc;
+
+	rc = qed_mcp_get_engine_config(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to get the engine affinity configuration\n");
+		return rc;
+	}
+
+	/* RoCE PF is bound to a single engine */
+	if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+		rc = qed_llh_set_roce_affinity(cdev, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the RoCE engine affinity\n");
+			return rc;
+		}
+
+		DP_VERBOSE(cdev,
+			   QED_MSG_SP,
+			   "LLH: Set the engine affinity of RoCE packets as %d\n",
+			   eng);
+	}
+
+	/* Storage PF is bound to a single engine while L2 PF uses both */
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+	else			/* L2_PERSONALITY */
+		eng = QED_BOTH_ENG;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the engine affinity of ppfid %d\n",
+				  ppfid);
+			return rc;
+		}
+	}
+
+	DP_VERBOSE(cdev, QED_MSG_SP,
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
+static int qed_llh_hw_init_pf(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 ppfid, abs_ppfid;
+	int rc;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		u32 addr;
+
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			return rc;
+
+		addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
+	}
+
+	if (test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+	    !QED_IS_FCOE_PERSONALITY(p_hwfn)) {
+		rc = qed_llh_add_mac_filter(cdev, 0,
+					    p_hwfn->hw_info.hw_mac_addr);
+		if (rc)
+			DP_NOTICE(cdev,
+				  "Failed to add an LLH filter with the primary MAC\n");
+	}
+
+	if (QED_IS_CMT(cdev)) {
+		rc = qed_llh_set_engine_affin(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev)
+{
+	return cdev->p_llh_info->num_ppfid;
+}
+
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK             0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT            0
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK         0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT        2
+
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, u8 ppfid, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for ppfid [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+	val = qed_rd(p_hwfn, p_ptt, addr);
+	SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel);
+	qed_wr(p_hwfn, p_ptt, addr, val);
+
+	/* The iWARP affinity is set as the affinity of ppfid 0 */
+	if (!ppfid && QED_IS_IWARP_PERSONALITY(p_hwfn))
+		cdev->iwarp_affin = (eng == QED_ENG1) ? 1 : 0;
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 ppfid, abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL,
+		       0xf);  /* QP bit 15 */
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for RoCE [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto out;
+
+		addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+		val = qed_rd(p_hwfn, p_ptt, addr);
+		SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel);
+		qed_wr(p_hwfn, p_ptt, addr, val);
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+struct qed_llh_filter_details {
+	u64 value;
+	u32 mode;
+	u32 protocol_type;
+	u32 hdr_sel;
+	u32 enable;
+};
+
+static int
+qed_llh_access_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 abs_ppfid,
+		      u8 filter_idx,
+		      struct qed_llh_filter_details *p_details)
+{
+	struct qed_dmae_params params = {0};
+	u32 addr;
+	u8 pfid;
+	int rc;
+
+	/* The NIG/LLH registers that are accessed in this function have only 16
+	 * rows which are exposed to a PF. I.e. only the 16 filters of its
+	 * default ppfid. Accessing filters of other ppfids requires pretending
+	 * to another PFs.
+	 * The calculation of PPFID->PFID in AH is based on the relative index
+	 * of a PF on its port.
+	 * For BB the pfid is actually the abs_ppfid.
+	 */
+	if (QED_IS_BB(p_hwfn->cdev))
+		pfid = abs_ppfid;
+	else
+		pfid = abs_ppfid * p_hwfn->cdev->num_ports_in_engine +
+		    MFW_PORT(p_hwfn);
+
+	/* Filter enable - should be done first when removing a filter */
+	if (!p_details->enable) {
+		qed_fid_pretend(p_hwfn, p_ptt,
+				pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+
+		qed_fid_pretend(p_hwfn, p_ptt,
+				p_hwfn->rel_pf_id <<
+				PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+	}
+
+	/* Filter value */
+	addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4;
+
+	params.flags = QED_DMAE_FLAG_PF_DST;
+	params.dst_pfid = pfid;
+	rc = qed_dmae_host2grc(p_hwfn,
+			       p_ptt,
+			       (u64)(uintptr_t)&p_details->value,
+			       addr, 2 /* size_in_dwords */,
+			       &params);
+	if (rc)
+		return rc;
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	/* Filter mode */
+	addr = NIG_REG_LLH_FUNC_FILTER_MODE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->mode);
+
+	/* Filter protocol type */
+	addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->protocol_type);
+
+	/* Filter header select */
+	addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->hdr_sel);
+
+	/* Filter enable - should be done last when adding a filter */
+	if (p_details->enable) {
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+	}
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			p_hwfn->rel_pf_id <<
+			PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	return 0;
+}
+
+static int
+qed_llh_add_filter(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u8 abs_ppfid,
+		   u8 filter_idx, u8 filter_prot_type, u32 high, u32 low)
+{
+	struct qed_llh_filter_details filter_details;
+
+	filter_details.enable = 1;
+	filter_details.value = ((u64)high << 32) | low;
+	filter_details.hdr_sel = 0;
+	filter_details.protocol_type = filter_prot_type;
+	/* Mode: 0: MAC-address classification 1: protocol classification */
+	filter_details.mode = filter_prot_type ? 1 : 0;
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+static int
+qed_llh_remove_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx)
+{
+	struct qed_llh_filter_details filter_details = {0};
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	memcpy(filter.mac.addr, mac_addr, ETH_ALEN);
+	rc = qed_llh_shadow_add_filter(cdev, ppfid,
+				       QED_LLH_FILTER_TYPE_MAC,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto err;
+
+		high = mac_addr[1] | (mac_addr[0] << 8);
+		low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) |
+		      (mac_addr[2] << 24);
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+					0, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added MAC filter [%pM] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to add MAC filter [%pM] to ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+static int
+qed_llh_protocol_filter_stringify(struct qed_dev *cdev,
+				  enum qed_llh_prot_filter_type_t type,
+				  u16 source_port_or_eth_type,
+				  u16 dest_port, u8 *str, size_t str_len)
+{
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		snprintf(str, str_len, "Ethertype 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+		snprintf(str, str_len, "TCP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		snprintf(str, str_len, "UDP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+		snprintf(str, str_len, "TCP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		snprintf(str, str_len, "UDP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "TCP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "UDP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_protocol_filter_to_hilo(struct qed_dev *cdev,
+				enum qed_llh_prot_filter_type_t type,
+				u16 source_port_or_eth_type,
+				u16 dest_port, u32 *p_high, u32 *p_low)
+{
+	*p_high = 0;
+	*p_low = 0;
+
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		*p_high = source_port_or_eth_type;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		*p_low = source_port_or_eth_type << 16;
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		*p_low = dest_port;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		*p_low = (source_port_or_eth_type << 16) | dest_port;
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32], type_bitmap;
+	union qed_llh_filter filter = {};
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_add_filter(cdev,
+				       ppfid,
+				       QED_LLH_FILTER_TYPE_PROTOCOL,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_protocol_filter_to_hilo(cdev, type,
+						     source_port_or_eth_type,
+						     dest_port, &high, &low);
+		if (rc)
+			goto err;
+
+		type_bitmap = 0x1 << type;
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid,
+					filter_idx, type_bitmap, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(p_hwfn,
+		  "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	ether_addr_copy(filter.mac.addr, mac_addr);
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed MAC filter [%pM] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove MAC filter [%pM] from ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+void qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+				    u8 ppfid,
+				    enum qed_llh_prot_filter_type_t type,
+				    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32];
+	union qed_llh_filter filter = {};
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+/******************************* NIG LLH - End ********************************/
+
 #define QED_MIN_DPIS            (4)
 #define QED_MIN_PWM_REGION      (QED_WID_SIZE * QED_MIN_DPIS)
 
@@ -461,6 +1382,8 @@ void qed_resc_free(struct qed_dev *cdev)
 	kfree(cdev->reset_stats);
 	cdev->reset_stats = NULL;
 
+	qed_llh_free(cdev);
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -1428,6 +2351,13 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			goto alloc_err;
 	}
 
+	rc = qed_llh_alloc(cdev);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to allocate memory for the llh_info structure\n");
+		goto alloc_err;
+	}
+
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
 	if (!cdev->reset_stats)
 		goto alloc_no_mem;
@@ -1879,6 +2809,10 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
 {
 	int rc = 0;
 
+	/* In CMT the gate should be cleared by the 2nd hwfn */
+	if (!QED_IS_CMT(p_hwfn->cdev) || !IS_LEAD_HWFN(p_hwfn))
+		STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0);
+
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
 	if (rc)
 		return rc;
@@ -1964,6 +2898,13 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
+	/* Use the leading hwfn since in CMT only NIG #0 is operational */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_llh_hw_init_pf(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	if (b_hw_start) {
 		/* enable interrupts */
 		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -2393,6 +3334,12 @@ int qed_hw_stop(struct qed_dev *cdev)
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
+		if (IS_LEAD_HWFN(p_hwfn) &&
+		    test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+		    !QED_IS_FCOE_PERSONALITY(p_hwfn))
+			qed_llh_remove_mac_filter(cdev, 0,
+						  p_hwfn->hw_info.hw_mac_addr);
+
 		if (!cdev->recov_in_prog) {
 			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
 			if (rc) {
@@ -2868,6 +3815,36 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn)
 	return 0;
 }
 
+static int qed_hw_get_ppfid_bitmap(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 native_ppfid_idx;
+	int rc;
+
+	/* Calculation of BB/AH is different for native_ppfid_idx */
+	if (QED_IS_BB(cdev))
+		native_ppfid_idx = p_hwfn->rel_pf_id;
+	else
+		native_ppfid_idx = p_hwfn->rel_pf_id /
+		    cdev->num_ports_in_engine;
+
+	rc = qed_mcp_get_ppfid_bitmap(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return rc;
+	else if (rc == -EOPNOTSUPP)
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+
+	if (!(cdev->ppfid_bitmap & (0x1 << native_ppfid_idx))) {
+		DP_INFO(p_hwfn,
+			"Fix the PPFID bitmap to include the native PPFID [native_ppfid_idx %hhd, orig_bitmap 0x%hhx]\n",
+			native_ppfid_idx, cdev->ppfid_bitmap);
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+	}
+
+	return 0;
+}
+
 static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_resc_unlock_params resc_unlock_params;
@@ -2925,6 +3902,13 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 				"Failed to release the resource lock for the resource allocation commands\n");
 	}
 
+	/* PPFID bitmap */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_hw_get_ppfid_bitmap(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	/* Sanity for ILT */
 	if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) ||
 	    (!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) {
@@ -3443,6 +4427,7 @@ static void qed_nvm_info_free(struct qed_hwfn *p_hwfn)
 static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_regview,
 				 void __iomem *p_doorbells,
+				 u64 db_phys_addr,
 				 enum qed_pci_personality personality)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
@@ -3451,6 +4436,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	/* Split PCI bars evenly between hwfns */
 	p_hwfn->regview = p_regview;
 	p_hwfn->doorbells = p_doorbells;
+	p_hwfn->db_phys_addr = db_phys_addr;
 
 	if (IS_VF(p_hwfn->cdev))
 		return qed_vf_hw_prepare(p_hwfn);
@@ -3546,7 +4532,9 @@ int qed_hw_prepare(struct qed_dev *cdev,
 	/* Initialize the first hwfn - will learn number of hwfns */
 	rc = qed_hw_prepare_single(p_hwfn,
 				   cdev->regview,
-				   cdev->doorbells, personality);
+				   cdev->doorbells,
+				   cdev->db_phys_addr,
+				   personality);
 	if (rc)
 		return rc;
 
@@ -3555,22 +4543,25 @@ int qed_hw_prepare(struct qed_dev *cdev,
 	/* Initialize the rest of the hwfns */
 	if (cdev->num_hwfns > 1) {
 		void __iomem *p_regview, *p_doorbell;
-		u8 __iomem *addr;
+		u64 db_phys_addr;
+		u32 offset;
 
 		/* adjust bar offset for second engine */
-		addr = cdev->regview +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_0) / 2;
-		p_regview = addr;
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_0) / 2;
+		p_regview = cdev->regview + offset;
+
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_1) / 2;
 
-		addr = cdev->doorbells +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_1) / 2;
-		p_doorbell = addr;
+		p_doorbell = cdev->doorbells + offset;
+
+		db_phys_addr = cdev->db_phys_addr + offset;
 
 		/* prepare second hw function */
 		rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
-					   p_doorbell, personality);
+					   p_doorbell, db_phys_addr,
+					   personality);
 
 		/* in case of error, need to free the previously
 		 * initiliazed hwfn 0.
@@ -3951,269 +4942,6 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 	return 0;
 }
 
-static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low,
-				  u8 *p_filter)
-{
-	*p_high = p_filter[1] | (p_filter[0] << 8);
-	*p_low = p_filter[5] | (p_filter[4] << 8) |
-		 (p_filter[3] << 16) | (p_filter[2] << 24);
-}
-
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-
-	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-		   "mac: %pM is added at %d\n",
-		   p_filter, i);
-
-	return 0;
-}
-
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find the entry and clean it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "mac: %pM is removed from %d\n",
-			   p_filter, i);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
-int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port, enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return -EINVAL;
-	}
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 1);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 1 << type);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "ETH type %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	}
-	return 0;
-}
-
-void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return;
-	}
-
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)))
-			continue;
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32)))
-			continue;
-		if (!(qed_rd(p_hwfn, p_ptt,
-			     NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-			     i * sizeof(u32)) & BIT(type)))
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-		break;
-	}
-
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
 static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			    u32 hw_addr, void *p_eth_qzone,
 			    size_t eth_qzone_size, u8 timeset)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..47376d4d071f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -241,11 +241,17 @@ enum qed_dmae_address_type_t {
 #define QED_DMAE_FLAG_VF_SRC		0x00000002
 #define QED_DMAE_FLAG_VF_DST		0x00000004
 #define QED_DMAE_FLAG_COMPLETION_DST	0x00000008
+#define QED_DMAE_FLAG_PORT		0x00000010
+#define QED_DMAE_FLAG_PF_SRC		0x00000020
+#define QED_DMAE_FLAG_PF_DST		0x00000040
 
 struct qed_dmae_params {
 	u32 flags; /* consists of QED_DMAE_FLAG_* values */
 	u8 src_vfid;
 	u8 dst_vfid;
+	u8 port_id;
+	u8 src_pfid;
+	u8 dst_pfid;
 };
 
 /**
@@ -257,7 +263,7 @@ struct qed_dmae_params {
  * @param source_addr
  * @param grc_addr (dmae_data_offset)
  * @param size_in_dwords
- * @param flags (one of the flags defined above)
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int
 qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
@@ -265,7 +271,7 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		  u64 source_addr,
 		  u32 grc_addr,
 		  u32 size_in_dwords,
-		  u32 flags);
+		  struct qed_dmae_params *p_params);
 
  /**
  * @brief qed_dmae_grc2host - Read data from dmae data offset
@@ -275,11 +281,11 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
  * @param grc_addr (dmae_data_offset)
  * @param dest_addr
  * @param size_in_dwords
- * @param flags - one of the flags defined above
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		      u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
-		      u32 flags);
+		      struct qed_dmae_params *p_params);
 
 /**
  * @brief qed_dmae_host2host - copy data from to source address
@@ -290,7 +296,7 @@ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
  * @param source_addr
  * @param dest_addr
  * @param size_in_dwords
- * @param params
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
@@ -368,26 +374,66 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
 		   u8 *dst_id);
 
 /**
- * @brief qed_llh_add_mac_filter - configures a MAC filter in llh
+ * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
+ *	banks that are allocated to the PF.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to add
+ * @param cdev
+ *
+ * @return u8 - Number of LLH filter banks
  */
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter);
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
+
+enum qed_eng {
+	QED_ENG0,
+	QED_ENG1,
+	QED_BOTH_ENG,
+};
 
 /**
- * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh
+ * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
+ *	LLH filter bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
+			       u8 ppfid, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
+ *
+ * @param cdev
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+ *	bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to add
+ */
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN]);
+
+/**
+ * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+ *	filter bank.
  *
- * @param p_hwfn
  * @param p_ptt
  * @param p_filter - MAC to remove
  */
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter);
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN]);
 
-enum qed_llh_port_filter_type_t {
+enum qed_llh_prot_filter_type_t {
 	QED_LLH_FILTER_ETHERTYPE,
 	QED_LLH_FILTER_TCP_SRC_PORT,
 	QED_LLH_FILTER_TCP_DEST_PORT,
@@ -398,36 +444,37 @@ enum qed_llh_port_filter_type_t {
 };
 
 /**
- * @brief qed_llh_add_protocol_filter - configures a protocol filter in llh
+ * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
+ *	given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
  * @param type - type of filters and comparing
  */
 int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port,
-			    enum qed_llh_port_filter_type_t type);
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * @brief qed_llh_remove_protocol_filter - remove a protocol filter in llh
+ * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
+ *	the given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
- * @param type - type of filters and comparing
  */
 void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type);
+qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+			       u8 ppfid,
+			       enum qed_llh_prot_filter_type_t type,
+			       u16 source_port_or_eth_type, u16 dest_port);
 
 /**
  * *@brief Cleanup of previous driver remains prior to load
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d3b9b5..de31a382f58e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -745,7 +745,7 @@ struct qed_hash_fcoe_con {
 static int qed_fill_fcoe_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_fcoe_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 	int rc;
 
 	memset(info, 0, sizeof(*info));
@@ -806,15 +806,15 @@ static int qed_fcoe_stop(struct qed_dev *cdev)
 		return -EINVAL;
 	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	p_ptt = qed_ptt_acquire(QED_AFFIN_HWFN(cdev));
 	if (!p_ptt)
 		return -EAGAIN;
 
 	/* Stop the fcoe */
-	rc = qed_sp_fcoe_func_stop(QED_LEADING_HWFN(cdev), p_ptt,
+	rc = qed_sp_fcoe_func_stop(QED_AFFIN_HWFN(cdev), p_ptt,
 				   QED_SPQ_MODE_EBLOCK, NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	qed_ptt_release(QED_AFFIN_HWFN(cdev), p_ptt);
 
 	return rc;
 }
@@ -828,8 +828,8 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
 		return 0;
 	}
 
-	rc = qed_sp_fcoe_func_start(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_fcoe_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start fcoe\n");
 		return rc;
@@ -849,7 +849,7 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
 			return -ENOMEM;
 		}
 
-		rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), tid_info);
+		rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 		if (rc) {
 			DP_NOTICE(cdev, "Failed to gather task information\n");
 			qed_fcoe_stop(cdev);
@@ -884,7 +884,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
 	}
 
 	/* Acquire the connection */
-	rc = qed_fcoe_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_fcoe_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					 &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -898,7 +898,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_fcoe_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_fcoe_get_db_addr(QED_AFFIN_HWFN(cdev),
 						   *handle);
 
 	return 0;
@@ -916,7 +916,7 @@ static int qed_fcoe_release_conn(struct qed_dev *cdev, u32 handle)
 	}
 
 	hlist_del(&hash_con->node);
-	qed_fcoe_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_fcoe_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -971,7 +971,7 @@ static int qed_fcoe_offload_conn(struct qed_dev *cdev,
 	con->d_id.addr_mid = conn_info->d_id.addr_mid;
 	con->d_id.addr_lo = conn_info->d_id.addr_lo;
 
-	return qed_sp_fcoe_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -992,13 +992,13 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
 	con = hash_con->con;
 	con->terminate_params = terminate_params;
 
-	return qed_sp_fcoe_conn_destroy(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_destroy(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
 {
-	return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 37edaa847512..e054f6c69e3a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12612,8 +12612,10 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
-#define DRV_MSG_CODE_RESOURCE_CMD	0x00230000
+#define DRV_MSG_CODE_RESOURCE_CMD		0x00230000
 #define DRV_MSG_CODE_GET_TLV_DONE		0x002f0000
+#define DRV_MSG_CODE_GET_ENGINE_CONFIG		0x00370000
+#define DRV_MSG_CODE_GET_PPFID_BITMAP		0x43000000
 
 #define RESOURCE_CMD_REQ_RESC_MASK		0x0000001F
 #define RESOURCE_CMD_REQ_RESC_SHIFT		0
@@ -12802,6 +12804,18 @@ struct public_drv_mb {
 
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK   0x00000001
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_SHIFT 0
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK   0x00000002
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_SHIFT 1
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK    0x00000004
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_SHIFT  2
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK    0x00000008
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_SHIFT  3
+
+#define FW_MB_PARAM_PPFID_BITMAP_MASK	0xFF
+#define FW_MB_PARAM_PPFID_BITMAP_SHIFT	0
+
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK			0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..a4de9e3ef72c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -392,11 +392,15 @@ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid)
 }
 
 /* DMAE */
+#define QED_DMAE_FLAGS_IS_SET(params, flag) \
+	((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag))
+
 static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    const u8 is_src_type_grc,
 			    const u8 is_dst_type_grc,
 			    struct qed_dmae_params *p_params)
 {
+	u8 src_pfid, dst_pfid, port_id;
 	u16 opcode_b = 0;
 	u32 opcode = 0;
 
@@ -407,14 +411,18 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 	opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
 				   : DMAE_CMD_SRC_MASK_PCIE) <<
 		   DMAE_CMD_SRC_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
+	src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
+		   p_params->src_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
 		   DMAE_CMD_SRC_PF_ID_SHIFT);
 
 	/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
 	opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
 				   : DMAE_CMD_DST_MASK_PCIE) <<
 		   DMAE_CMD_DST_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
+	dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
+		   p_params->dst_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
 		   DMAE_CMD_DST_PF_ID_SHIFT);
 
 	/* Whether to write a completion word to the completion destination:
@@ -425,12 +433,14 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
 		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
 
-	if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST)
+	if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
 		opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
 
 	opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
 
-	opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT);
+	port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
+		   p_params->port_id : p_hwfn->port_id;
+	opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT);
 
 	/* reset source address in next go */
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
@@ -441,7 +451,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 		   DMAE_CMD_DST_ADDR_RESET_SHIFT);
 
 	/* SRC/DST VFID: all 1's - pf, otherwise VF id */
-	if (p_params->flags & QED_DMAE_FLAG_VF_SRC) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
 		opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT;
 	} else {
@@ -449,7 +459,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    DMAE_CMD_SRC_VF_ID_SHIFT;
 	}
 
-	if (p_params->flags & QED_DMAE_FLAG_VF_DST) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
 		opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
 	} else {
@@ -733,7 +743,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 	for (i = 0; i <= cnt_split; i++) {
 		offset = length_limit * i;
 
-		if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) {
+		if (!QED_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) {
 			if (src_type == QED_DMAE_ADDRESS_GRC)
 				src_addr_split = src_addr + offset;
 			else
@@ -771,14 +781,12 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
-		  u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
+		      u64 source_addr, u32 grc_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
@@ -786,7 +794,7 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 				      grc_addr_in_dw,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
 				      QED_DMAE_ADDRESS_GRC,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -796,21 +804,19 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
 		      u32 grc_addr,
-		      dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+		      dma_addr_t dest_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
 	rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
 				      dest_addr, QED_DMAE_ADDRESS_GRC,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -842,7 +848,6 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase)
 {
 	u32 size = PAGE_SIZE / 2, val;
-	struct qed_dmae_params params;
 	int rc = 0;
 	dma_addr_t p_phys;
 	void *p_virt;
@@ -875,9 +880,8 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		   (u64)p_phys,
 		   p_virt, (u64)(p_phys + size), (u8 *)p_virt + size, size);
 
-	memset(&params, 0, sizeof(params));
 	rc = qed_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size,
-				size / 4 /* size_in_dwords */, &params);
+				size / 4, NULL);
 	if (rc) {
 		DP_NOTICE(p_hwfn,
 			  "DMAE sanity [%s]: qed_dmae_host2host() failed. rc = %d.\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 34193c2f1699..a868d7f88601 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -131,7 +131,7 @@ static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(p_init_val + i),
-				       addr + (i << 2), segment, 0);
+				       addr + (i << 2), segment, NULL);
 		if (rc)
 			return rc;
 
@@ -194,7 +194,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
 	} else {
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(buf + dmae_data_offset),
-				       addr, size, 0);
+				       addr, size, NULL);
 	}
 
 	return rc;
@@ -205,6 +205,7 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 			      u32 addr, u32 fill, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+	struct qed_dmae_params params = {};
 
 	memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
 
@@ -214,10 +215,10 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 	 * 3. p_hwfb->temp_data,
 	 * 4. fill_count
 	 */
-
+	params.flags = QED_DMAE_FLAG_RW_REPL_SRC;
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
-				 addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
+				 addr, fill_count, &params);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index fdfedbc8e431..4e8118a08654 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1508,10 +1508,10 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr,
 				  CAU_REG_SB_ADDR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 	} else {
 		/* Initialize Status Block Address */
 		STORE_RT_REG_AGG(p_hwfn,
@@ -2362,7 +2362,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       sb_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2376,7 +2376,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 			       (u64)(uintptr_t)&sb_entry,
 			       CAU_REG_SB_VAR_MEMORY +
-			       sb_id * sizeof(u64), 2, 0);
+			       sb_id * sizeof(u64), 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685d1a55..5585c18053ec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -1082,7 +1082,7 @@ struct qed_hash_iscsi_con {
 static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
 				   struct qed_dev_iscsi_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 
 	int rc;
 
@@ -1141,8 +1141,8 @@ static int qed_iscsi_stop(struct qed_dev *cdev)
 	}
 
 	/* Stop the iscsi */
-	rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_iscsi_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
 
 	return rc;
@@ -1161,9 +1161,8 @@ static int qed_iscsi_start(struct qed_dev *cdev,
 		return 0;
 	}
 
-	rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
-				     QED_SPQ_MODE_EBLOCK, NULL, event_context,
-				     async_event_cb);
+	rc = qed_sp_iscsi_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				     NULL, event_context, async_event_cb);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start iscsi\n");
 		return rc;
@@ -1182,8 +1181,7 @@ static int qed_iscsi_start(struct qed_dev *cdev,
 		return -ENOMEM;
 	}
 
-	rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
-				      tid_info);
+	rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to gather task information\n");
 		qed_iscsi_stop(cdev);
@@ -1215,7 +1213,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
 		return -ENOMEM;
 
 	/* Acquire the connection */
-	rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_iscsi_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					  &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -1229,7 +1227,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_iscsi_get_db_addr(QED_AFFIN_HWFN(cdev),
 						    *handle);
 
 	return 0;
@@ -1247,7 +1245,7 @@ static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
 	}
 
 	hlist_del(&hash_con->node);
-	qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_iscsi_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -1324,7 +1322,7 @@ static int qed_iscsi_offload_conn(struct qed_dev *cdev,
 	/* Set default values on other connection fields */
 	con->offl_flags = 0x1;
 
-	return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					 QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1351,7 +1349,7 @@ static int qed_iscsi_update_conn(struct qed_dev *cdev,
 	con->first_seq_length = conn_info->first_seq_length;
 	con->exp_stat_sn = conn_info->exp_stat_sn;
 
-	return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_update(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1366,8 +1364,7 @@ static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
-					  hash_con->con,
+	return qed_sp_iscsi_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con,
 					  QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1385,14 +1382,13 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
 
 	hash_con->con->abortive_dsconnect = abrt_conn;
 
-	return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
-					   hash_con->con,
+	return qed_sp_iscsi_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con,
 					   QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
 {
-	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1407,8 +1403,7 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev),
-				       hash_con->con,
+	return qed_sp_iscsi_mac_update(QED_AFFIN_HWFN(cdev), hash_con->con,
 				       QED_SPQ_MODE_EBLOCK, NULL);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ded556b7bab5..f380fae8799d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -63,7 +63,12 @@ struct mpa_v2_hdr {
 #define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
 
 #define QED_IWARP_INVALID_TCP_CID	0xffffffff
-#define QED_IWARP_RCV_WND_SIZE_DEF	(256 * 1024)
+
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_2P (200 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_4P (100 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_2P (150 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_4P (90 * 1024)
+
 #define QED_IWARP_RCV_WND_SIZE_MIN	(0xffff)
 #define TIMESTAMP_HEADER_SIZE		(12)
 #define QED_IWARP_MAX_FIN_RT_DEFAULT	(2)
@@ -532,7 +537,8 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 
 	/* Make sure ep is closed before returning and freeing memory. */
 	if (ep) {
-		while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+		while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED &&
+		       wait_count++ < 200)
 			msleep(100);
 
 		if (ep->state != QED_IWARP_EP_CLOSED)
@@ -1022,8 +1028,6 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
 
 	params.ep_context = ep;
 
-	ep->state = QED_IWARP_EP_CLOSED;
-
 	switch (fw_return_code) {
 	case RDMA_RETURN_OK:
 		ep->qp->max_rd_atomic_req = ep->cm_info.ord;
@@ -1083,6 +1087,10 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
 		break;
 	}
 
+	if (fw_return_code != RDMA_RETURN_OK)
+		/* paired with READ_ONCE in destroy_qp */
+		smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
 	ep->event_cb(ep->cb_context, &params);
 
 	/* on passive side, if there is no associated QP (REJECT) we need to
@@ -2528,7 +2536,7 @@ qed_iwarp_ll2_slowpath(void *cxt,
 		memset(fpdu, 0, sizeof(*fpdu));
 }
 
-static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 	int rc = 0;
@@ -2563,8 +2571,9 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
 	}
 
-	qed_llh_remove_mac_filter(p_hwfn,
-				  p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+	qed_llh_remove_mac_filter(p_hwfn->cdev, 0,
+				  p_hwfn->p_rdma_info->iwarp.mac_addr);
+
 	return rc;
 }
 
@@ -2609,7 +2618,7 @@ qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
 static int
 qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params,
-		    struct qed_ptt *p_ptt)
+		    u32 rcv_wnd_size)
 {
 	struct qed_iwarp_info *iwarp_info;
 	struct qed_ll2_acquire_data data;
@@ -2628,7 +2637,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 
 	ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
 
-	rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+	rc = qed_llh_add_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 	if (rc)
 		return rc;
 
@@ -2637,6 +2646,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
 	cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
 	cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+	cbs.slowpath_cb = NULL;
 	cbs.cookie = p_hwfn;
 
 	memset(&data, 0, sizeof(data));
@@ -2653,7 +2663,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+		qed_llh_remove_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 		return rc;
 	}
 
@@ -2675,7 +2685,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	data.input.conn_type = QED_LL2_TYPE_OOO;
 	data.input.mtu = params->max_mtu;
 
-	n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) /
+	n_ooo_bufs = (QED_IWARP_MAX_OOO * rcv_wnd_size) /
 		     iwarp_info->max_mtu;
 	n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE);
 
@@ -2708,6 +2718,8 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	data.input.rx_num_desc = n_ooo_bufs * 2;
 	data.input.tx_num_desc = data.input.rx_num_desc;
 	data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
+	data.input.tx_tc = PKT_LB_TC;
+	data.input.tx_dest = QED_LL2_TX_DEST_LB;
 	data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
 	data.input.secondary_queue = true;
 	data.cbs = &cbs;
@@ -2757,21 +2769,35 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 			      &iwarp_info->mpa_buf_list);
 	return rc;
 err:
-	qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	qed_iwarp_ll2_stop(p_hwfn);
 
 	return rc;
 }
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+static struct {
+	u32 two_ports;
+	u32 four_ports;
+} qed_iwarp_rcv_wnd_size[MAX_CHIP_IDS] = {
+	{QED_IWARP_RCV_WND_SIZE_DEF_BB_2P, QED_IWARP_RCV_WND_SIZE_DEF_BB_4P},
+	{QED_IWARP_RCV_WND_SIZE_DEF_AH_2P, QED_IWARP_RCV_WND_SIZE_DEF_AH_4P}
+};
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_iwarp_info *iwarp_info;
+	enum chip_ids chip_id;
 	u32 rcv_wnd_size;
 
 	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 
 	iwarp_info->tcp_flags = QED_IWARP_TS_EN;
-	rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+	chip_id = QED_IS_BB(cdev) ? CHIP_BB : CHIP_K2;
+	rcv_wnd_size = (qed_device_num_ports(cdev) == 4) ?
+		qed_iwarp_rcv_wnd_size[chip_id].four_ports :
+		qed_iwarp_rcv_wnd_size[chip_id].two_ports;
 
 	/* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
 	iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
@@ -2794,10 +2820,10 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				  qed_iwarp_async_event);
 	qed_ooo_setup(p_hwfn);
 
-	return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+	return qed_iwarp_ll2_start(p_hwfn, params, rcv_wnd_size);
 }
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn)
 {
 	int rc;
 
@@ -2808,7 +2834,7 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
 
-	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	return qed_iwarp_ll2_stop(p_hwfn);
 }
 
 static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
@@ -2825,7 +2851,9 @@ static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
 	params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
 			 0 : -ECONNRESET;
 
-	ep->state = QED_IWARP_EP_CLOSED;
+	/* paired with READ_ONCE in destroy_qp */
+	smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
 	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 	list_del(&ep->list_entry);
 	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
@@ -2914,7 +2942,8 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
 	params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
 	params.ep_context = ep;
 	params.cm_info = &ep->cm_info;
-	ep->state = QED_IWARP_EP_CLOSED;
+	/* paired with READ_ONCE in destroy_qp */
+	smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
 
 	switch (fw_return_code) {
 	case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 7ac959038324..c1b2057d23b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -183,13 +183,13 @@ struct qed_iwarp_listener {
 
 int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params);
 
 void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 			      struct iwarp_init_func_ramrod_data *p_ramrod);
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn);
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 57641728df69..9f36e7948222 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2111,7 +2111,7 @@ int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2144,7 +2144,7 @@ int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index b5f419b71287..19a1a58d60f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -239,9 +239,8 @@ out_post1:
 	buffer->phys_addr = new_phys_addr;
 
 out_post:
-	rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle,
-				    buffer->phys_addr, 0,  buffer, 1);
-
+	rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle,
+				    buffer->phys_addr, 0, buffer, 1);
 	if (rc)
 		qed_ll2_dealloc_buffer(cdev, buffer);
 }
@@ -926,16 +925,15 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 	return 0;
 }
 
-static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+static void qed_ll2_stop_ooo(struct qed_hwfn *p_hwfn)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 
-	DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
-		   *handle);
+	DP_VERBOSE(p_hwfn, (QED_MSG_STORAGE | QED_MSG_LL2),
+		   "Stopping LL2 OOO queue [%02x]\n", *handle);
 
-	qed_ll2_terminate_connection(hwfn, *handle);
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_terminate_connection(p_hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 	*handle = QED_LL2_UNUSED_HANDLE;
 }
 
@@ -1574,12 +1572,12 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-						    ETH_P_FCOE, 0,
-						    QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-					    ETH_P_FIP, 0,
-					    QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+						    QED_LLH_FILTER_ETHERTYPE,
+						    ETH_P_FCOE, 0);
+		qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+					    QED_LLH_FILTER_ETHERTYPE,
+					    ETH_P_FIP, 0);
 	}
 
 out:
@@ -1980,12 +1978,12 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-						       ETH_P_FCOE, 0,
-						      QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-					       ETH_P_FIP, 0,
-					       QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+						       QED_LLH_FILTER_ETHERTYPE,
+						       ETH_P_FCOE, 0);
+		qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+					       QED_LLH_FILTER_ETHERTYPE,
+					       ETH_P_FIP, 0);
 	}
 
 out:
@@ -2086,12 +2084,12 @@ static void _qed_ll2_get_port_stats(struct qed_hwfn *p_hwfn,
 			TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)),
 			sizeof(port_stats));
 
-	p_stats->gsi_invalid_hdr = HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
-	p_stats->gsi_invalid_pkt_length =
+	p_stats->gsi_invalid_hdr += HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
+	p_stats->gsi_invalid_pkt_length +=
 	    HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length);
-	p_stats->gsi_unsupported_pkt_typ =
+	p_stats->gsi_unsupported_pkt_typ +=
 	    HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ);
-	p_stats->gsi_crcchksm_error =
+	p_stats->gsi_crcchksm_error +=
 	    HILO_64_REGPAIR(port_stats.gsi_crcchksm_error);
 }
 
@@ -2109,9 +2107,9 @@ static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
 
-	p_stats->packet_too_big_discard =
+	p_stats->packet_too_big_discard +=
 			HILO_64_REGPAIR(tstats.packet_too_big_discard);
-	p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard);
+	p_stats->no_buff_discard += HILO_64_REGPAIR(tstats.no_buff_discard);
 }
 
 static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
@@ -2128,12 +2126,12 @@ static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
 
-	p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
-	p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
-	p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
-	p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
-	p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
-	p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+	p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+	p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+	p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+	p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+	p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+	p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
 }
 
 static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
@@ -2150,23 +2148,21 @@ static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id);
 	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
 
-	p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes);
-	p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes);
-	p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes);
-	p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts);
-	p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts);
-	p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+	p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+	p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+	p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+	p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+	p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+	p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts);
 }
 
-int qed_ll2_get_stats(void *cxt,
-		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+static int __qed_ll2_get_stats(void *cxt, u8 connection_handle,
+			       struct qed_ll2_stats *p_stats)
 {
 	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	struct qed_ptt *p_ptt;
 
-	memset(p_stats, 0, sizeof(*p_stats));
-
 	if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) ||
 	    !p_hwfn->p_ll2_info)
 		return -EINVAL;
@@ -2181,15 +2177,26 @@ int qed_ll2_get_stats(void *cxt,
 
 	if (p_ll2_conn->input.gsi_enable)
 		_qed_ll2_get_port_stats(p_hwfn, p_ptt, p_stats);
+
 	_qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	_qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	if (p_ll2_conn->tx_stats_en)
 		_qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
 
 	qed_ptt_release(p_hwfn, p_ptt);
+
 	return 0;
 }
 
+int qed_ll2_get_stats(void *cxt,
+		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+{
+	memset(p_stats, 0, sizeof(*p_stats));
+	return __qed_ll2_get_stats(cxt, connection_handle, p_stats);
+}
+
 static void qed_ll2b_release_rx_packet(void *cxt,
 				       u8 connection_handle,
 				       void *cookie,
@@ -2216,7 +2223,7 @@ struct qed_ll2_cbs ll2_cbs = {
 	.tx_release_cb = &qed_ll2b_complete_tx_packet,
 };
 
-static void qed_ll2_set_conn_data(struct qed_dev *cdev,
+static void qed_ll2_set_conn_data(struct qed_hwfn *p_hwfn,
 				  struct qed_ll2_acquire_data *data,
 				  struct qed_ll2_params *params,
 				  enum qed_ll2_conn_type conn_type,
@@ -2232,7 +2239,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	data->input.tx_num_desc = QED_LL2_TX_SIZE;
 	data->p_connection_handle = handle;
 	data->cbs = &ll2_cbs;
-	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
+	ll2_cbs.cookie = p_hwfn;
 
 	if (lb) {
 		data->input.tx_tc = PKT_LB_TC;
@@ -2243,74 +2250,102 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	}
 }
 
-static int qed_ll2_start_ooo(struct qed_dev *cdev,
+static int qed_ll2_start_ooo(struct qed_hwfn *p_hwfn,
 			     struct qed_ll2_params *params)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 	struct qed_ll2_acquire_data data;
 	int rc;
 
-	qed_ll2_set_conn_data(cdev, &data, params,
+	qed_ll2_set_conn_data(p_hwfn, &data, params,
 			      QED_LL2_TYPE_OOO, handle, true);
 
-	rc = qed_ll2_acquire_connection(hwfn, &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to acquire LL2 OOO connection\n");
 		goto out;
 	}
 
-	rc = qed_ll2_establish_connection(hwfn, *handle);
+	rc = qed_ll2_establish_connection(p_hwfn, *handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to establish LL2 OOO connection\n");
 		goto fail;
 	}
 
 	return 0;
 
 fail:
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 out:
 	*handle = QED_LL2_UNUSED_HANDLE;
 	return rc;
 }
 
-static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev)
 {
-	struct qed_ll2_buffer *buffer, *tmp_buffer;
-	enum qed_ll2_conn_type conn_type;
-	struct qed_ll2_acquire_data data;
-	struct qed_ptt *p_ptt;
-	int rc, i;
+	return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
+		QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
+		(QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev));
+}
 
+static int __qed_ll2_stop(struct qed_hwfn *p_hwfn)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
 
-	/* Initialize LL2 locks & lists */
-	INIT_LIST_HEAD(&cdev->ll2->list);
-	spin_lock_init(&cdev->ll2->lock);
-	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
-			     L1_CACHE_BYTES + params->mtu;
+	rc = qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+	if (rc)
+		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
 
-	/*Allocate memory for LL2 */
-	DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n",
-		cdev->ll2->rx_size);
-	for (i = 0; i < QED_LL2_RX_SIZE; i++) {
-		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-		if (!buffer) {
-			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
-			goto fail;
-		}
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
 
-		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
-					  &buffer->phys_addr);
-		if (rc) {
-			kfree(buffer);
-			goto fail;
-		}
+	return rc;
+}
 
-		list_add_tail(&buffer->list, &cdev->ll2->list);
+static int qed_ll2_stop(struct qed_dev *cdev)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc = 0, rc2 = 0;
+
+	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
+		return 0;
+
+	qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
+	eth_zero_addr(cdev->ll2_mac_address);
+
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc2 = __qed_ll2_stop(QED_LEADING_HWFN(cdev));
+		if (rc2)
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to stop LL2 on engine 0\n");
 	}
 
-	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
+	rc = __qed_ll2_stop(p_hwfn);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Failed to stop LL2\n");
+
+	qed_ll2_kill_buffers(cdev);
+
+	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+	return rc | rc2;
+}
+
+static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
+			   struct qed_ll2_params *params)
+{
+	struct qed_ll2_buffer *buffer, *tmp_buffer;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_ll2_conn_type conn_type;
+	struct qed_ll2_acquire_data data;
+	int rc, rx_cnt;
+
+	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_FCOE:
 		conn_type = QED_LL2_TYPE_FCOE;
 		break;
@@ -2321,33 +2356,34 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		conn_type = QED_LL2_TYPE_ROCE;
 		break;
 	default:
+
 		conn_type = QED_LL2_TYPE_TEST;
 	}
 
-	qed_ll2_set_conn_data(cdev, &data, params, conn_type,
+	qed_ll2_set_conn_data(p_hwfn, &data, params, conn_type,
 			      &cdev->ll2->handle, false);
 
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 connection\n");
-		goto fail;
+		DP_INFO(p_hwfn, "Failed to acquire LL2 connection\n");
+		return rc;
 	}
 
-	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
+	rc = qed_ll2_establish_connection(p_hwfn, cdev->ll2->handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establish LL2 connection\n");
-		goto release_fail;
+		DP_INFO(p_hwfn, "Failed to establish LL2 connection\n");
+		goto release_conn;
 	}
 
 	/* Post all Rx buffers to FW */
 	spin_lock_bh(&cdev->ll2->lock);
+	rx_cnt = cdev->ll2->rx_cnt;
 	list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) {
-		rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_post_rx_buffer(p_hwfn,
 					    cdev->ll2->handle,
 					    buffer->phys_addr, 0, buffer, 1);
 		if (rc) {
-			DP_INFO(cdev,
+			DP_INFO(p_hwfn,
 				"Failed to post an Rx buffer; Deleting it\n");
 			dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
 					 cdev->ll2->rx_size, DMA_FROM_DEVICE);
@@ -2355,100 +2391,127 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 			list_del(&buffer->list);
 			kfree(buffer);
 		} else {
-			cdev->ll2->rx_cnt++;
+			rx_cnt++;
 		}
 	}
 	spin_unlock_bh(&cdev->ll2->lock);
 
-	if (!cdev->ll2->rx_cnt) {
-		DP_INFO(cdev, "Failed passing even a single Rx buffer\n");
-		goto release_terminate;
+	if (rx_cnt == cdev->ll2->rx_cnt) {
+		DP_NOTICE(p_hwfn, "Failed passing even a single Rx buffer\n");
+		goto terminate_conn;
 	}
+	cdev->ll2->rx_cnt = rx_cnt;
+
+	return 0;
+
+terminate_conn:
+	qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+release_conn:
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
+	return rc;
+}
+
+static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	struct qed_ll2_buffer *buffer;
+	int rx_num_desc, i, rc;
 
 	if (!is_valid_ether_addr(params->ll2_mac_address)) {
-		DP_INFO(cdev, "Invalid Ethernet address\n");
-		goto release_terminate;
+		DP_NOTICE(cdev, "Invalid Ethernet address\n");
+		return -EINVAL;
 	}
 
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) {
-		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
-		rc = qed_ll2_start_ooo(cdev, params);
+	WARN_ON(!cdev->ll2->cbs);
+
+	/* Initialize LL2 locks & lists */
+	INIT_LIST_HEAD(&cdev->ll2->list);
+	spin_lock_init(&cdev->ll2->lock);
+
+	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+			     L1_CACHE_BYTES + params->mtu;
+
+	/* Allocate memory for LL2.
+	 * In CMT mode, in case of a storage PF which is affintized to engine 1,
+	 * LL2 is started also on engine 0 and thus we need twofold buffers.
+	 */
+	rx_num_desc = QED_LL2_RX_SIZE * (b_is_storage_eng1 ? 2 : 1);
+	DP_INFO(cdev, "Allocating %d LL2 buffers of size %08x bytes\n",
+		rx_num_desc, cdev->ll2->rx_size);
+	for (i = 0; i < rx_num_desc; i++) {
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!buffer) {
+			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
+					  &buffer->phys_addr);
 		if (rc) {
-			DP_INFO(cdev,
-				"Failed to initialize the OOO LL2 queue\n");
-			goto release_terminate;
+			kfree(buffer);
+			goto err0;
 		}
-	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto release_terminate;
+		list_add_tail(&buffer->list, &cdev->ll2->list);
 	}
 
-	rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				    params->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	rc = __qed_ll2_start(p_hwfn, params);
 	if (rc) {
-		DP_ERR(cdev, "Failed to allocate LLH filter\n");
-		goto release_terminate_all;
+		DP_NOTICE(cdev, "Failed to start LL2\n");
+		goto err0;
 	}
 
-	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
-	return 0;
-
-release_terminate_all:
-
-release_terminate:
-	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-release_fail:
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-fail:
-	qed_ll2_kill_buffers(cdev);
-	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-	return -EINVAL;
-}
-
-static int qed_ll2_stop(struct qed_dev *cdev)
-{
-	struct qed_ptt *p_ptt;
-	int rc;
-
-	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
-		return 0;
+	/* In CMT mode, always need to start LL2 on engine 0 for a storage PF,
+	 * since broadcast/mutlicast packets are routed to engine 0.
+	 */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_start(QED_LEADING_HWFN(cdev), params);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to start LL2 on engine 0\n");
+			goto err1;
+		}
+	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto fail;
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) {
+		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+		rc = qed_ll2_start_ooo(p_hwfn, params);
+		if (rc) {
+			DP_NOTICE(cdev, "Failed to start OOO LL2\n");
+			goto err2;
+		}
 	}
 
-	qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				  cdev->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
-	eth_zero_addr(cdev->ll2_mac_address);
+	rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to add an LLH filter\n");
+		goto err3;
+	}
 
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI)
-		qed_ll2_stop_ooo(cdev);
+	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
 
-	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
-	if (rc)
-		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
+	return 0;
 
+err3:
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+err2:
+	if (b_is_storage_eng1)
+		__qed_ll2_stop(QED_LEADING_HWFN(cdev));
+err1:
+	__qed_ll2_stop(p_hwfn);
+err0:
 	qed_ll2_kill_buffers(cdev);
-
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
 	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-
 	return rc;
-fail:
-	return -EINVAL;
 }
 
 static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 			      unsigned long xmit_flags)
 {
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 	struct qed_ll2_tx_pkt_info pkt;
 	const skb_frag_t *frag;
 	u8 flags = 0, nr_frags;
@@ -2506,7 +2569,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 	 * routine may run and free the SKB, so no dereferencing the SKB
 	 * beyond this point unless skb has any fragments.
 	 */
-	rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle,
 				       &pkt, 1);
 	if (rc)
 		goto err;
@@ -2524,13 +2587,13 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 			goto err;
 		}
 
-		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
 						       cdev->ll2->handle,
 						       mapping,
 						       skb_frag_size(frag));
 
 		/* if failed not much to do here, partial packet has been posted
-		 * we can't free memory, will need to wait for completion.
+		 * we can't free memory, will need to wait for completion
 		 */
 		if (rc)
 			goto err2;
@@ -2540,18 +2603,37 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 
 err:
 	dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE);
-
 err2:
 	return rc;
 }
 
 static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
 {
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc;
+
 	if (!cdev->ll2)
 		return -EINVAL;
 
-	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
-				 cdev->ll2->handle, stats);
+	rc = qed_ll2_get_stats(p_hwfn, cdev->ll2->handle, stats);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to get LL2 stats\n");
+		return rc;
+	}
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+					 cdev->ll2->handle, stats);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to get LL2 stats on engine 0\n");
+			return rc;
+		}
+	}
+
+	return 0;
 }
 
 const struct qed_ll2_ops qed_ll2_ops_pass = {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 6de23b56b294..829dd60ab937 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -48,6 +48,7 @@
 #include <linux/crc32.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
+#include <net/devlink.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
@@ -342,6 +343,107 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 	return 0;
 }
 
+struct qed_devlink {
+	struct qed_dev *cdev;
+};
+
+enum qed_devlink_param_id {
+	QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	ctx->val.vbool = cdev->iwarp_cmt;
+
+	return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	cdev->iwarp_cmt = ctx->val.vbool;
+
+	return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+			     "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static const struct devlink_ops qed_dl_ops;
+
+static int qed_devlink_register(struct qed_dev *cdev)
+{
+	union devlink_param_value value;
+	struct qed_devlink *qed_dl;
+	struct devlink *dl;
+	int rc;
+
+	dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
+	if (!dl)
+		return -ENOMEM;
+
+	qed_dl = devlink_priv(dl);
+
+	cdev->dl = dl;
+	qed_dl->cdev = cdev;
+
+	rc = devlink_register(dl, &cdev->pdev->dev);
+	if (rc)
+		goto err_free;
+
+	rc = devlink_params_register(dl, qed_devlink_params,
+				     ARRAY_SIZE(qed_devlink_params));
+	if (rc)
+		goto err_unregister;
+
+	value.vbool = false;
+	devlink_param_driverinit_value_set(dl,
+					   QED_DEVLINK_PARAM_ID_IWARP_CMT,
+					   value);
+
+	devlink_params_publish(dl);
+	cdev->iwarp_cmt = false;
+
+	return 0;
+
+err_unregister:
+	devlink_unregister(dl);
+
+err_free:
+	cdev->dl = NULL;
+	devlink_free(dl);
+
+	return rc;
+}
+
+static void qed_devlink_unregister(struct qed_dev *cdev)
+{
+	if (!cdev->dl)
+		return;
+
+	devlink_params_unregister(cdev->dl, qed_devlink_params,
+				  ARRAY_SIZE(qed_devlink_params));
+
+	devlink_unregister(cdev->dl);
+	devlink_free(cdev->dl);
+}
+
 /* probing */
 static struct qed_dev *qed_probe(struct pci_dev *pdev,
 				 struct qed_probe_params *params)
@@ -370,6 +472,12 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 	}
 	DP_INFO(cdev, "PCI init completed successfully\n");
 
+	rc = qed_devlink_register(cdev);
+	if (rc) {
+		DP_INFO(cdev, "Failed to register devlink.\n");
+		goto err2;
+	}
+
 	rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
 	if (rc) {
 		DP_ERR(cdev, "hw prepare failed\n");
@@ -399,6 +507,8 @@ static void qed_remove(struct qed_dev *cdev)
 
 	qed_set_power_state(cdev, PCI_D3hot);
 
+	qed_devlink_unregister(cdev);
+
 	qed_free_cdev(cdev);
 }
 
@@ -1301,26 +1411,21 @@ static u32 qed_sb_init(struct qed_dev *cdev,
 {
 	struct qed_hwfn *p_hwfn;
 	struct qed_ptt *p_ptt;
-	int hwfn_index;
 	u16 rel_sb_id;
-	u8 n_hwfns;
 	u32 rc;
 
-	/* RoCE uses single engine and CMT uses two engines. When using both
-	 * we force only a single engine. Storage uses only engine 0 too.
-	 */
-	if (type == QED_SB_TYPE_L2_QUEUE)
-		n_hwfns = cdev->num_hwfns;
-	else
-		n_hwfns = 1;
-
-	hwfn_index = sb_id % n_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / n_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	if (IS_PF(p_hwfn->cdev)) {
 		p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1339,20 +1444,26 @@ static u32 qed_sb_init(struct qed_dev *cdev,
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-			  struct qed_sb_info *sb_info, u16 sb_id)
+			  struct qed_sb_info *sb_info,
+			  u16 sb_id,
+			  enum qed_sb_type type)
 {
 	struct qed_hwfn *p_hwfn;
-	int hwfn_index;
 	u16 rel_sb_id;
 	u32 rc;
 
-	hwfn_index = sb_id % cdev->num_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / cdev->num_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id);
 
@@ -2372,6 +2483,11 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
 	return rc;
 }
 
+static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev)
+{
+	return QED_AFFIN_HWFN_IDX(cdev);
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -2419,6 +2535,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.db_recovery_add = &qed_db_recovery_add,
 	.db_recovery_del = &qed_db_recovery_del,
 	.read_module_eeprom = &qed_read_module_eeprom,
+	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index cc27fd60d689..758702c1ce9c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3685,3 +3685,68 @@ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
 			   features, &mcp_resp, &mcp_param);
 }
+
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 fir_valid, l2_valid;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_engine_config command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	fir_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID);
+	if (fir_valid)
+		cdev->fir_affin =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE);
+
+	l2_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				     FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID);
+	if (l2_valid)
+		cdev->l2_affin_hint =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE);
+
+	DP_INFO(p_hwfn,
+		"Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n",
+		fir_valid, cdev->fir_affin, l2_valid, cdev->l2_affin_hint);
+
+	return 0;
+}
+
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_ppfid_bitmap command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	cdev->ppfid_bitmap = QED_MFW_GET_FIELD(mb_params.mcp_param,
+					       FW_MB_PARAM_PPFID_BITMAP);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "PPFID bitmap 0x%hhx\n",
+		   cdev->ppfid_bitmap);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 261c1a392e2c..e4f8fe4bd062 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1186,4 +1186,20 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  */
 int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
 
+/**
+ * @brief Get the engine affinity configuration.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Get the PPFID bitmap.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 1302b308bd87..0dacf2c18c09 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -44,6 +44,8 @@
 /* Add/subtract the Adjustment_Value when making a Drift adjustment */
 #define QED_DRIFT_CNTR_DIRECTION_SHIFT		31
 #define QED_TIMESTAMP_MASK			BIT(16)
+/* Param mask for Hardware to detect/timestamp the unicast PTP packets */
+#define QED_PTP_UCAST_PARAM_MASK		0xF
 
 static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn)
 {
@@ -157,7 +159,8 @@ static int qed_ptp_hw_read_tx_ts(struct qed_dev *cdev, u64 *timestamp)
 	*timestamp = 0;
 	val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID);
 	if (!(val & QED_TIMESTAMP_MASK)) {
-		DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val);
+		DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+			   "Invalid Tx timestamp, buf_seqid = %08x\n", val);
 		return -EINVAL;
 	}
 
@@ -242,7 +245,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev,
 		return -EINVAL;
 	}
 
-	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK,
+	       QED_PTP_UCAST_PARAM_MASK);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, rule_mask);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, enable_cfg);
 
@@ -252,7 +256,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev,
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF);
 	} else {
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, enable_cfg);
-		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0);
+		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK,
+		       QED_PTP_UCAST_PARAM_MASK);
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, rule_mask);
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 7873d6dfd91f..f900fde448db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -700,7 +700,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+		rc = qed_iwarp_setup(p_hwfn, params);
 		if (rc)
 			return rc;
 	} else {
@@ -742,7 +742,7 @@ static int qed_rdma_stop(void *rdma_cxt)
 	       (ll2_ethertype_en & 0xFFFE));
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_stop(p_hwfn, p_ptt);
+		rc = qed_iwarp_stop(p_hwfn);
 		if (rc) {
 			qed_ptt_release(p_hwfn, p_ptt);
 			return rc;
@@ -803,7 +803,7 @@ static int qed_rdma_add_user(void *rdma_cxt,
 				     dpi_start_offset +
 				     ((out_params->dpi) * p_hwfn->dpi_size));
 
-	out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
+	out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
 				    dpi_start_offset +
 				    ((out_params->dpi) * p_hwfn->dpi_size);
 
@@ -818,14 +818,17 @@ static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 	struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+	struct qed_mcp_link_state *p_link_output;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
 
-	/* Link may have changed */
-	p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
-			     QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+	/* The link state is saved only for the leading hwfn */
+	p_link_output = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output;
 
-	p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+	p_port->port_state = p_link_output->link_up ? QED_RDMA_PORT_UP
+	    : QED_RDMA_PORT_DOWN;
+
+	p_port->link_speed = p_link_output->speed;
 
 	p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE;
 
@@ -870,7 +873,7 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
 static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_rdma_info *info)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 
 	memset(info, 0, sizeof(*info));
 
@@ -889,9 +892,9 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
 	int feat_num;
 
 	if (cdev->num_hwfns > 1)
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE);
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE);
 	else
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) *
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE) *
 			   cdev->num_hwfns;
 
 	return feat_num;
@@ -899,7 +902,7 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
 
 static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev)
 {
-	int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ);
+	int n_cnq = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_RDMA_CNQ);
 	int n_msix = cdev->int_params.rdma_msix_cnt;
 
 	return min_t(int, n_cnq, n_msix);
@@ -1653,7 +1656,7 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
 
 static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
 {
-	return QED_LEADING_HWFN(cdev);
+	return QED_AFFIN_HWFN(cdev);
 }
 
 static int qed_rdma_modify_srq(void *rdma_cxt,
@@ -1881,7 +1884,7 @@ err:
 static int qed_rdma_init(struct qed_dev *cdev,
 			 struct qed_rdma_start_in_params *params)
 {
-	return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
+	return qed_rdma_start(QED_AFFIN_HWFN(cdev), params);
 }
 
 static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
@@ -1899,23 +1902,12 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 				       u8 *old_mac_address,
 				       u8 *new_mac_address)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt;
 	int rc = 0;
 
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt) {
-		DP_ERR(cdev,
-		       "qed roce ll2 mac filter set: failed to acquire PTT\n");
-		return -EINVAL;
-	}
-
 	if (old_mac_address)
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address);
+		qed_llh_remove_mac_filter(cdev, 0, old_mac_address);
 	if (new_mac_address)
-		rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address);
-
-	qed_ptt_release(p_hwfn, p_ptt);
+		rc = qed_llh_add_mac_filter(cdev, 0, new_mac_address);
 
 	if (rc)
 		DP_ERR(cdev,
@@ -1924,6 +1916,36 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 	return rc;
 }
 
+static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset)
+{
+	enum qed_eng eng;
+	u8 ppfid = 0;
+	int rc;
+
+	/* Make sure iwarp cmt mode is enabled before setting affinity */
+	if (!cdev->iwarp_cmt)
+		return -EINVAL;
+
+	if (b_reset)
+		eng = QED_BOTH_ENG;
+	else
+		eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0;
+
+	rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to set the engine affinity of ppfid %d\n",
+			  ppfid);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP),
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
 static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_rdma_dev_info,
@@ -1963,6 +1985,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_set_engine_affin = &qed_iwarp_set_engine_affin,
 	.iwarp_connect = &qed_iwarp_connect,
 	.iwarp_create_listen = &qed_iwarp_create_listen,
 	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 5ce825ca5f24..60f850c3bdd6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -254,6 +254,10 @@
 	0x500840UL
 #define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \
 	0x50196cUL
+#define NIG_REG_LLH_PPFID2PFID_TBL_0 \
+	0x501970UL
+#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL	\
+	0x50
 #define NIG_REG_LLH_CLS_TYPE_DUALMODE \
 	0x501964UL
 #define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL
@@ -1626,6 +1630,8 @@
 #define PHY_PCIE_REG_PHY1_K2_E5 \
 	0x624000UL
 #define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL
+#define NIG_REG_PPF_TO_ENGINE_SEL_SIZE 8
 #define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
 #define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
 #define DORQ_REG_PF_DPM_ENABLE 0x100510UL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 5a495fda9e9d..7e0b795230b2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -588,7 +588,7 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2f318aaf2b05..78f77b712b10 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -917,10 +917,11 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 		/* Configure igu sb in CAU which were marked valid */
 		qed_init_cau_sb_entry(p_hwfn, &sb_entry,
 				      p_hwfn->rel_pf_id, vf->abs_vf_id, 1);
+
 		qed_dmae_host2grc(p_hwfn, p_ptt,
 				  (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  p_block->igu_sb_id * sizeof(u64), 2, 0);
+				  p_block->igu_sb_id * sizeof(u64), 2, NULL);
 	}
 
 	vf->num_sbs = (u8) num_rx_queues;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 92fe226980fd..0e931c04fecf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -92,6 +92,7 @@ struct qede_stats_common {
 	u64 non_coalesced_pkts;
 	u64 coalesced_bytes;
 	u64 link_change_count;
+	u64 ptp_skip_txts;
 
 	/* port */
 	u64 rx_64_byte_packets;
@@ -189,6 +190,7 @@ struct qede_dev {
 
 	const struct qed_eth_ops	*ops;
 	struct qede_ptp			*ptp;
+	u64				ptp_skip_txts;
 
 	struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev)	((edev)->dev_info.num_queues)
@@ -549,7 +551,7 @@ int qede_txq_has_work(struct qede_tx_queue *txq);
 void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count);
 void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
-			    struct tc_cls_flower_offload *f);
+			    struct flow_cls_offload *f);
 
 #define RX_RING_SIZE_POW	13
 #define RX_RING_SIZE		((u16)BIT(RX_RING_SIZE_POW))
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 8911a97ab0ca..e85f9fef930c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -174,6 +174,7 @@ static const struct {
 	QEDE_STAT(coalesced_bytes),
 
 	QEDE_STAT(link_change_count),
+	QEDE_STAT(ptp_skip_txts),
 };
 
 #define QEDE_NUM_STATS	ARRAY_SIZE(qede_stats_arr)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index add922b93d2c..9a6a9a008714 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1943,7 +1943,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto,
 }
 
 int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
-			    struct tc_cls_flower_offload *f)
+			    struct flow_cls_offload *f)
 {
 	struct qede_arfs_fltr_node *n;
 	int min_hlen, rc = -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 02a97c659e29..8d1c208f778f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -390,6 +390,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
 	p_common->brb_discards = stats.common.brb_discards;
 	p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
 	p_common->link_change_count = stats.common.link_change_count;
+	p_common->ptp_skip_txts = edev->ptp_skip_txts;
 
 	if (QEDE_IS_BB(edev)) {
 		struct qede_stats_bb *p_bb = &edev->stats.bb;
@@ -547,13 +548,13 @@ static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
 }
 
 static int
-qede_set_flower(struct qede_dev *edev, struct tc_cls_flower_offload *f,
+qede_set_flower(struct qede_dev *edev, struct flow_cls_offload *f,
 		__be16 proto)
 {
 	switch (f->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return qede_add_tc_flower_fltr(edev, proto, f);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return qede_delete_flow_filter(edev, f->cookie);
 	default:
 		return -EOPNOTSUPP;
@@ -563,7 +564,7 @@ qede_set_flower(struct qede_dev *edev, struct tc_cls_flower_offload *f,
 static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 				  void *cb_priv)
 {
-	struct tc_cls_flower_offload *f;
+	struct flow_cls_offload *f;
 	struct qede_dev *edev = cb_priv;
 
 	if (!tc_cls_can_offload_and_chain0(edev->ndev, type_data))
@@ -578,24 +579,7 @@ static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static int qede_setup_tc_block(struct qede_dev *edev,
-			       struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     qede_setup_tc_block_cb,
-					     edev, edev, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, qede_setup_tc_block_cb, edev);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(qede_block_cb_list);
 
 static int
 qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
@@ -606,7 +590,10 @@ qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
 
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return qede_setup_tc_block(edev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &qede_block_cb_list,
+						  qede_setup_tc_block_cb,
+						  edev, edev, true);
 	case TC_SETUP_QDISC_MQPRIO:
 		mqprio = type_data;
 
@@ -959,13 +946,13 @@ void __qede_unlock(struct qede_dev *edev)
 /* This version of the lock should be used when acquiring the RTNL lock is also
  * needed in addition to the internal qede lock.
  */
-void qede_lock(struct qede_dev *edev)
+static void qede_lock(struct qede_dev *edev)
 {
 	rtnl_lock();
 	__qede_lock(edev);
 }
 
-void qede_unlock(struct qede_dev *edev)
+static void qede_unlock(struct qede_dev *edev)
 {
 	__qede_unlock(edev);
 	rtnl_unlock();
@@ -1306,7 +1293,8 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info,
 			     u16 sb_id)
 {
 	if (sb_info->sb_virt) {
-		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id);
+		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id,
+					      QED_SB_TYPE_L2_QUEUE);
 		dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt),
 				  (void *)sb_info->sb_virt, sb_info->sb_phys);
 		memset(sb_info, 0, sizeof(*sb_info));
@@ -2231,6 +2219,8 @@ out:
 	if (mode != QEDE_UNLOAD_RECOVERY)
 		DP_NOTICE(edev, "Link is down\n");
 
+	edev->ptp_skip_txts = 0;
+
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index bddb2b5982dc..f815435cf106 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #include "qede_ptp.h"
+#define QEDE_PTP_TX_TIMEOUT (2 * HZ)
 
 struct qede_ptp {
 	const struct qed_eth_ptp_ops	*ops;
@@ -38,6 +39,7 @@ struct qede_ptp {
 	struct timecounter		tc;
 	struct ptp_clock		*clock;
 	struct work_struct		work;
+	unsigned long			ptp_tx_start;
 	struct qede_dev			*edev;
 	struct sk_buff			*tx_skb;
 
@@ -160,18 +162,30 @@ static void qede_ptp_task(struct work_struct *work)
 	struct qede_dev *edev;
 	struct qede_ptp *ptp;
 	u64 timestamp, ns;
+	bool timedout;
 	int rc;
 
 	ptp = container_of(work, struct qede_ptp, work);
 	edev = ptp->edev;
+	timedout = time_is_before_jiffies(ptp->ptp_tx_start +
+					  QEDE_PTP_TX_TIMEOUT);
 
 	/* Read Tx timestamp registers */
 	spin_lock_bh(&ptp->lock);
 	rc = ptp->ops->read_tx_ts(edev->cdev, &timestamp);
 	spin_unlock_bh(&ptp->lock);
 	if (rc) {
-		/* Reschedule to keep checking for a valid timestamp value */
-		schedule_work(&ptp->work);
+		if (unlikely(timedout)) {
+			DP_INFO(edev, "Tx timestamp is not recorded\n");
+			dev_kfree_skb_any(ptp->tx_skb);
+			ptp->tx_skb = NULL;
+			clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+					 &edev->flags);
+			edev->ptp_skip_txts++;
+		} else {
+			/* Reschedule to keep checking for a valid TS value */
+			schedule_work(&ptp->work);
+		}
 		return;
 	}
 
@@ -514,19 +528,28 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb)
 	if (!ptp)
 		return;
 
-	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags))
+	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+				  &edev->flags)) {
+		DP_ERR(edev, "Timestamping in progress\n");
+		edev->ptp_skip_txts++;
 		return;
+	}
 
 	if (unlikely(!test_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags))) {
-		DP_NOTICE(edev,
-			  "Tx timestamping was not enabled, this packet will not be timestamped\n");
+		DP_ERR(edev,
+		       "Tx timestamping was not enabled, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else if (unlikely(ptp->tx_skb)) {
-		DP_NOTICE(edev,
-			  "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		DP_ERR(edev,
+		       "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		/* schedule check for Tx timestamp */
 		ptp->tx_skb = skb_get(skb);
+		ptp->ptp_tx_start = jiffies;
 		schedule_work(&ptp->work);
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 7a873002e626..c07438db30ba 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -4119,13 +4119,14 @@ static void
 qlcnic_config_indev_addr(struct qlcnic_adapter *adapter,
 			struct net_device *dev, unsigned long event)
 {
+	const struct in_ifaddr *ifa;
 	struct in_device *indev;
 
 	indev = in_dev_get(dev);
 	if (!indev)
 		return;
 
-	for_ifa(indev) {
+	in_dev_for_each_ifa_rtnl(ifa, indev) {
 		switch (event) {
 		case NETDEV_UP:
 			qlcnic_config_ipaddr(adapter,
@@ -4138,7 +4139,7 @@ qlcnic_config_indev_addr(struct qlcnic_adapter *adapter,
 		default:
 			break;
 		}
-	} endfor_ifa(indev);
+	}
 
 	in_dev_put(indev);
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index af3b037fa442..5632da05145a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1066,7 +1066,7 @@ static int qlcnic_sriov_pf_cfg_ip_cmd(struct qlcnic_bc_trans *trans,
 {
 	struct qlcnic_vf_info *vf = trans->vf;
 	struct qlcnic_adapter *adapter = vf->adapter;
-	int err = -EIO;
+	int err;
 
 	cmd->req.arg[1] |= vf->vp->handle << 16;
 	cmd->req.arg[1] |= BIT_31;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 4bf20d0651c4..576501db2a0b 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -4,6 +4,7 @@
 
 #ifndef _RMNET_MAP_H_
 #define _RMNET_MAP_H_
+#include <linux/if_rmnet.h>
 
 struct rmnet_map_control_command {
 	u8  command_name;
@@ -31,30 +32,6 @@ enum rmnet_map_commands {
 	RMNET_MAP_COMMAND_ENUM_LENGTH
 };
 
-struct rmnet_map_header {
-	u8  pad_len:6;
-	u8  reserved_bit:1;
-	u8  cd_bit:1;
-	u8  mux_id;
-	__be16 pkt_len;
-}  __aligned(1);
-
-struct rmnet_map_dl_csum_trailer {
-	u8  reserved1;
-	u8  valid:1;
-	u8  reserved2:7;
-	u16 csum_start_offset;
-	u16 csum_length;
-	__be16 csum_value;
-} __aligned(1);
-
-struct rmnet_map_ul_csum_header {
-	__be16 csum_start_offset;
-	u16 csum_insert_offset:14;
-	u16 udp_ip4_ind:1;
-	u16 csum_enabled:1;
-} __aligned(1);
-
 #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
 				 (Y)->data)->mux_id)
 #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile
index 33be8c5ad0c9..d5304bad2372 100644
--- a/drivers/net/ethernet/realtek/Makefile
+++ b/drivers/net/ethernet/realtek/Makefile
@@ -6,4 +6,5 @@
 obj-$(CONFIG_8139CP) += 8139cp.o
 obj-$(CONFIG_8139TOO) += 8139too.o
 obj-$(CONFIG_ATP) += atp.o
+r8169-objs += r8169_main.o r8169_firmware.o
 obj-$(CONFIG_R8169) += r8169.o
diff --git a/drivers/net/ethernet/realtek/r8169_firmware.c b/drivers/net/ethernet/realtek/r8169_firmware.c
new file mode 100644
index 000000000000..8f54a2c832eb
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_firmware.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* r8169_firmware.c: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include "r8169_firmware.h"
+
+enum rtl_fw_opcode {
+	PHY_READ		= 0x0,
+	PHY_DATA_OR		= 0x1,
+	PHY_DATA_AND		= 0x2,
+	PHY_BJMPN		= 0x3,
+	PHY_MDIO_CHG		= 0x4,
+	PHY_CLEAR_READCOUNT	= 0x7,
+	PHY_WRITE		= 0x8,
+	PHY_READCOUNT_EQ_SKIP	= 0x9,
+	PHY_COMP_EQ_SKIPN	= 0xa,
+	PHY_COMP_NEQ_SKIPN	= 0xb,
+	PHY_WRITE_PREVIOUS	= 0xc,
+	PHY_SKIPN		= 0xd,
+	PHY_DELAY_MS		= 0xe,
+};
+
+struct fw_info {
+	u32	magic;
+	char	version[RTL_VER_SIZE];
+	__le32	fw_start;
+	__le32	fw_len;
+	u8	chksum;
+} __packed;
+
+#define FW_OPCODE_SIZE	sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code))
+
+static bool rtl_fw_format_ok(struct rtl_fw *rtl_fw)
+{
+	const struct firmware *fw = rtl_fw->fw;
+	struct fw_info *fw_info = (struct fw_info *)fw->data;
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+
+	if (fw->size < FW_OPCODE_SIZE)
+		return false;
+
+	if (!fw_info->magic) {
+		size_t i, size, start;
+		u8 checksum = 0;
+
+		if (fw->size < sizeof(*fw_info))
+			return false;
+
+		for (i = 0; i < fw->size; i++)
+			checksum += fw->data[i];
+		if (checksum != 0)
+			return false;
+
+		start = le32_to_cpu(fw_info->fw_start);
+		if (start > fw->size)
+			return false;
+
+		size = le32_to_cpu(fw_info->fw_len);
+		if (size > (fw->size - start) / FW_OPCODE_SIZE)
+			return false;
+
+		strscpy(rtl_fw->version, fw_info->version, RTL_VER_SIZE);
+
+		pa->code = (__le32 *)(fw->data + start);
+		pa->size = size;
+	} else {
+		if (fw->size % FW_OPCODE_SIZE)
+			return false;
+
+		strscpy(rtl_fw->version, rtl_fw->fw_name, RTL_VER_SIZE);
+
+		pa->code = (__le32 *)fw->data;
+		pa->size = fw->size / FW_OPCODE_SIZE;
+	}
+
+	return true;
+}
+
+static bool rtl_fw_data_ok(struct rtl_fw *rtl_fw)
+{
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	size_t index;
+
+	for (index = 0; index < pa->size; index++) {
+		u32 action = le32_to_cpu(pa->code[index]);
+		u32 regno = (action & 0x0fff0000) >> 16;
+
+		switch (action >> 28) {
+		case PHY_READ:
+		case PHY_DATA_OR:
+		case PHY_DATA_AND:
+		case PHY_MDIO_CHG:
+		case PHY_CLEAR_READCOUNT:
+		case PHY_WRITE:
+		case PHY_WRITE_PREVIOUS:
+		case PHY_DELAY_MS:
+			break;
+
+		case PHY_BJMPN:
+			if (regno > index)
+				goto out;
+			break;
+		case PHY_READCOUNT_EQ_SKIP:
+			if (index + 2 >= pa->size)
+				goto out;
+			break;
+		case PHY_COMP_EQ_SKIPN:
+		case PHY_COMP_NEQ_SKIPN:
+		case PHY_SKIPN:
+			if (index + 1 + regno >= pa->size)
+				goto out;
+			break;
+
+		default:
+			dev_err(rtl_fw->dev, "Invalid action 0x%08x\n", action);
+			return false;
+		}
+	}
+
+	return true;
+out:
+	dev_err(rtl_fw->dev, "Out of range of firmware\n");
+	return false;
+}
+
+void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+{
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	rtl_fw_write_t fw_write = rtl_fw->phy_write;
+	rtl_fw_read_t fw_read = rtl_fw->phy_read;
+	int predata = 0, count = 0;
+	size_t index;
+
+	for (index = 0; index < pa->size; index++) {
+		u32 action = le32_to_cpu(pa->code[index]);
+		u32 data = action & 0x0000ffff;
+		u32 regno = (action & 0x0fff0000) >> 16;
+		enum rtl_fw_opcode opcode = action >> 28;
+
+		if (!action)
+			break;
+
+		switch (opcode) {
+		case PHY_READ:
+			predata = fw_read(tp, regno);
+			count++;
+			break;
+		case PHY_DATA_OR:
+			predata |= data;
+			break;
+		case PHY_DATA_AND:
+			predata &= data;
+			break;
+		case PHY_BJMPN:
+			index -= (regno + 1);
+			break;
+		case PHY_MDIO_CHG:
+			if (data == 0) {
+				fw_write = rtl_fw->phy_write;
+				fw_read = rtl_fw->phy_read;
+			} else if (data == 1) {
+				fw_write = rtl_fw->mac_mcu_write;
+				fw_read = rtl_fw->mac_mcu_read;
+			}
+
+			break;
+		case PHY_CLEAR_READCOUNT:
+			count = 0;
+			break;
+		case PHY_WRITE:
+			fw_write(tp, regno, data);
+			break;
+		case PHY_READCOUNT_EQ_SKIP:
+			if (count == data)
+				index++;
+			break;
+		case PHY_COMP_EQ_SKIPN:
+			if (predata == data)
+				index += regno;
+			break;
+		case PHY_COMP_NEQ_SKIPN:
+			if (predata != data)
+				index += regno;
+			break;
+		case PHY_WRITE_PREVIOUS:
+			fw_write(tp, regno, predata);
+			break;
+		case PHY_SKIPN:
+			index += regno;
+			break;
+		case PHY_DELAY_MS:
+			mdelay(data);
+			break;
+		}
+	}
+}
+
+void rtl_fw_release_firmware(struct rtl_fw *rtl_fw)
+{
+	release_firmware(rtl_fw->fw);
+}
+
+int rtl_fw_request_firmware(struct rtl_fw *rtl_fw)
+{
+	int rc;
+
+	rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, rtl_fw->dev);
+	if (rc < 0)
+		goto out;
+
+	if (!rtl_fw_format_ok(rtl_fw) || !rtl_fw_data_ok(rtl_fw)) {
+		release_firmware(rtl_fw->fw);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+out:
+	dev_err(rtl_fw->dev, "Unable to load firmware %s (%d)\n",
+		rtl_fw->fw_name, rc);
+	return rc;
+}
diff --git a/drivers/net/ethernet/realtek/r8169_firmware.h b/drivers/net/ethernet/realtek/r8169_firmware.h
new file mode 100644
index 000000000000..7dc348ed8345
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_firmware.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* r8169_firmware.h: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+
+struct rtl8169_private;
+typedef void (*rtl_fw_write_t)(struct rtl8169_private *tp, int reg, int val);
+typedef int (*rtl_fw_read_t)(struct rtl8169_private *tp, int reg);
+
+#define RTL_VER_SIZE		32
+
+struct rtl_fw {
+	rtl_fw_write_t phy_write;
+	rtl_fw_read_t phy_read;
+	rtl_fw_write_t mac_mcu_write;
+	rtl_fw_read_t mac_mcu_read;
+	const struct firmware *fw;
+	const char *fw_name;
+	struct device *dev;
+
+	char version[RTL_VER_SIZE];
+
+	struct rtl_fw_phy_action {
+		__le32 *code;
+		size_t size;
+	} phy_action;
+};
+
+int rtl_fw_request_firmware(struct rtl_fw *rtl_fw);
+void rtl_fw_release_firmware(struct rtl_fw *rtl_fw);
+void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169_main.c
index d06a61f00e78..efef5453b94f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -27,12 +27,13 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
-#include <linux/firmware.h>
 #include <linux/prefetch.h>
 #include <linux/pci-aspm.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
+#include "r8169_firmware.h"
+
 #define MODULENAME "r8169"
 
 #define FIRMWARE_8168D_1	"rtl_nic/rtl8168d-1.fw"
@@ -72,6 +73,8 @@ static const int multicast_filter_limit = 32;
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
 #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 
+#define RTL_CFG_NO_GBIT	1
+
 /* write/read MMIO register */
 #define RTL_W8(tp, reg, val8)	writeb((val8), tp->mmio_addr + (reg))
 #define RTL_W16(tp, reg, val16)	writew((val16), tp->mmio_addr + (reg))
@@ -81,7 +84,7 @@ static const int multicast_filter_limit = 32;
 #define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
-	RTL_GIGA_MAC_VER_01 = 0,
+	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
 	RTL_GIGA_MAC_VER_02,
 	RTL_GIGA_MAC_VER_03,
 	RTL_GIGA_MAC_VER_04,
@@ -132,7 +135,7 @@ enum mac_version {
 	RTL_GIGA_MAC_VER_49,
 	RTL_GIGA_MAC_VER_50,
 	RTL_GIGA_MAC_VER_51,
-	RTL_GIGA_MAC_NONE   = 0xff,
+	RTL_GIGA_MAC_NONE
 };
 
 #define JUMBO_1K	ETH_DATA_LEN
@@ -146,7 +149,6 @@ static const struct {
 	const char *fw_name;
 } rtl_chip_infos[] = {
 	/* PCI devices. */
-	[RTL_GIGA_MAC_VER_01] = {"RTL8169"				},
 	[RTL_GIGA_MAC_VER_02] = {"RTL8169s"				},
 	[RTL_GIGA_MAC_VER_03] = {"RTL8110s"				},
 	[RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb"			},
@@ -155,7 +157,7 @@ static const struct {
 	/* PCI-E devices. */
 	[RTL_GIGA_MAC_VER_07] = {"RTL8102e"				},
 	[RTL_GIGA_MAC_VER_08] = {"RTL8102e"				},
-	[RTL_GIGA_MAC_VER_09] = {"RTL8102e"				},
+	[RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e"			},
 	[RTL_GIGA_MAC_VER_10] = {"RTL8101e"				},
 	[RTL_GIGA_MAC_VER_11] = {"RTL8168b/8111b"			},
 	[RTL_GIGA_MAC_VER_12] = {"RTL8168b/8111b"			},
@@ -188,9 +190,9 @@ static const struct {
 	[RTL_GIGA_MAC_VER_39] = {"RTL8106e",		FIRMWARE_8106E_1},
 	[RTL_GIGA_MAC_VER_40] = {"RTL8168g/8111g",	FIRMWARE_8168G_2},
 	[RTL_GIGA_MAC_VER_41] = {"RTL8168g/8111g"			},
-	[RTL_GIGA_MAC_VER_42] = {"RTL8168g/8111g",	FIRMWARE_8168G_3},
-	[RTL_GIGA_MAC_VER_43] = {"RTL8106e",		FIRMWARE_8106E_2},
-	[RTL_GIGA_MAC_VER_44] = {"RTL8411",		FIRMWARE_8411_2 },
+	[RTL_GIGA_MAC_VER_42] = {"RTL8168gu/8111gu",	FIRMWARE_8168G_3},
+	[RTL_GIGA_MAC_VER_43] = {"RTL8106eus",		FIRMWARE_8106E_2},
+	[RTL_GIGA_MAC_VER_44] = {"RTL8411b",		FIRMWARE_8411_2 },
 	[RTL_GIGA_MAC_VER_45] = {"RTL8168h/8111h",	FIRMWARE_8168H_1},
 	[RTL_GIGA_MAC_VER_46] = {"RTL8168h/8111h",	FIRMWARE_8168H_2},
 	[RTL_GIGA_MAC_VER_47] = {"RTL8107e",		FIRMWARE_8107E_1},
@@ -200,32 +202,24 @@ static const struct {
 	[RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep"			},
 };
 
-enum cfg_version {
-	RTL_CFG_0 = 0x00,
-	RTL_CFG_1,
-	RTL_CFG_2
-};
-
 static const struct pci_device_id rtl8169_pci_tbl[] = {
-	{ PCI_VDEVICE(REALTEK,	0x2502), RTL_CFG_1 },
-	{ PCI_VDEVICE(REALTEK,	0x2600), RTL_CFG_1 },
-	{ PCI_VDEVICE(REALTEK,	0x8129), RTL_CFG_0 },
-	{ PCI_VDEVICE(REALTEK,	0x8136), RTL_CFG_2 },
-	{ PCI_VDEVICE(REALTEK,	0x8161), RTL_CFG_1 },
-	{ PCI_VDEVICE(REALTEK,	0x8167), RTL_CFG_0 },
-	{ PCI_VDEVICE(REALTEK,	0x8168), RTL_CFG_1 },
-	{ PCI_VDEVICE(NCUBE,	0x8168), RTL_CFG_1 },
-	{ PCI_VDEVICE(REALTEK,	0x8169), RTL_CFG_0 },
+	{ PCI_VDEVICE(REALTEK,	0x2502) },
+	{ PCI_VDEVICE(REALTEK,	0x2600) },
+	{ PCI_VDEVICE(REALTEK,	0x8129) },
+	{ PCI_VDEVICE(REALTEK,	0x8136), RTL_CFG_NO_GBIT },
+	{ PCI_VDEVICE(REALTEK,	0x8161) },
+	{ PCI_VDEVICE(REALTEK,	0x8167) },
+	{ PCI_VDEVICE(REALTEK,	0x8168) },
+	{ PCI_VDEVICE(NCUBE,	0x8168) },
+	{ PCI_VDEVICE(REALTEK,	0x8169) },
 	{ PCI_VENDOR_ID_DLINK,	0x4300,
-		PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 },
-	{ PCI_VDEVICE(DLINK,	0x4300), RTL_CFG_0 },
-	{ PCI_VDEVICE(DLINK,	0x4302), RTL_CFG_0 },
-	{ PCI_VDEVICE(AT,	0xc107), RTL_CFG_0 },
-	{ PCI_VDEVICE(USR,	0x0116), RTL_CFG_0 },
-	{ PCI_VENDOR_ID_LINKSYS,		0x1032,
-		PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 },
-	{ 0x0001,				0x8168,
-		PCI_ANY_ID, 0x2410, 0, 0, RTL_CFG_2 },
+		PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0 },
+	{ PCI_VDEVICE(DLINK,	0x4300) },
+	{ PCI_VDEVICE(DLINK,	0x4302) },
+	{ PCI_VDEVICE(AT,	0xc107) },
+	{ PCI_VDEVICE(USR,	0x0116) },
+	{ PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024 },
+	{ 0x0001, 0x8168, PCI_ANY_ID, 0x2410 },
 	{}
 };
 
@@ -406,8 +400,6 @@ enum rtl_register_content {
 	RxOK		= 0x0001,
 
 	/* RxStatusDesc */
-	RxBOVF	= (1 << 24),
-	RxFOVF	= (1 << 23),
 	RxRWT	= (1 << 22),
 	RxRES	= (1 << 21),
 	RxRUNT	= (1 << 20),
@@ -492,6 +484,7 @@ enum rtl_register_content {
 	PCIDAC		= (1 << 4),
 	PCIMulRW	= (1 << 3),
 #define INTT_MASK	GENMASK(1, 0)
+#define CPCMD_MASK	(Normal_mode | RxVlan | RxChkSum | INTT_MASK)
 
 	/* rtl8169_PHYstatus */
 	TBI_Enable	= 0x80,
@@ -503,9 +496,6 @@ enum rtl_register_content {
 	LinkStatus	= 0x02,
 	FullDup		= 0x01,
 
-	/* _TBICSRBit */
-	TBILinkOK	= 0x02000000,
-
 	/* ResetCounterCommand */
 	CounterReset	= 0x1,
 
@@ -578,7 +568,6 @@ enum rtl_rx_desc_bit {
 };
 
 #define RsvdMask	0x3fffc000
-#define CPCMD_QUIRK_MASK	(Normal_mode | RxVlan | RxChkSum | INTT_MASK)
 
 struct TxDesc {
 	__le32 opts1;
@@ -639,7 +628,7 @@ struct rtl8169_private {
 	struct phy_device *phydev;
 	struct napi_struct napi;
 	u32 msg_enable;
-	u16 mac_version;
+	enum mac_version mac_version;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
@@ -652,24 +641,9 @@ struct rtl8169_private {
 	void *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
 	u16 cp_cmd;
-
 	u16 irq_mask;
-	const struct rtl_coalesce_info *coalesce_info;
 	struct clk *clk;
 
-	struct mdio_ops {
-		void (*write)(struct rtl8169_private *, int, int);
-		int (*read)(struct rtl8169_private *, int);
-	} mdio_ops;
-
-	struct jumbo_ops {
-		void (*enable)(struct rtl8169_private *);
-		void (*disable)(struct rtl8169_private *);
-	} jumbo_ops;
-
-	void (*hw_start)(struct rtl8169_private *tp);
-	bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
-
 	struct {
 		DECLARE_BITMAP(flags, RTL_FLAG_MAX);
 		struct mutex mutex;
@@ -678,24 +652,14 @@ struct rtl8169_private {
 
 	unsigned irq_enabled:1;
 	unsigned supports_gmii:1;
+	unsigned aspm_manageable:1;
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
 	u32 saved_wolopts;
 
 	const char *fw_name;
-	struct rtl_fw {
-		const struct firmware *fw;
-
-#define RTL_VER_SIZE		32
-
-		char version[RTL_VER_SIZE];
-
-		struct rtl_fw_phy_action {
-			__le32 *code;
-			size_t size;
-		} phy_action;
-	} *rtl_fw;
+	struct rtl_fw *rtl_fw;
 
 	u32 ocp_base;
 };
@@ -759,6 +723,12 @@ static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 					   PCI_EXP_DEVCTL_READRQ, force);
 }
 
+static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
+{
+	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
+	       tp->mac_version != RTL_GIGA_MAC_VER_39;
+}
+
 struct rtl_cond {
 	bool (*check)(struct rtl8169_private *);
 	const char *msg;
@@ -847,7 +817,7 @@ static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
-static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
+static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
@@ -855,7 +825,7 @@ static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 	RTL_W32(tp, GPHY_OCP, reg << 15);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-		(RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
+		(RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
@@ -938,7 +908,7 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
 	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-		RTL_R32(tp, PHYAR) & 0xffff : ~0;
+		RTL_R32(tp, PHYAR) & 0xffff : -ETIMEDOUT;
 
 	/*
 	 * According to hardware specs a 20us delay is required after read
@@ -978,7 +948,7 @@ static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
+		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : -ETIMEDOUT;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT	0x00020000
@@ -1015,14 +985,38 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 	return value;
 }
 
-static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val)
+static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
 {
-	tp->mdio_ops.write(tp, location, val);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_27:
+		r8168dp_1_mdio_write(tp, location, val);
+		break;
+	case RTL_GIGA_MAC_VER_28:
+	case RTL_GIGA_MAC_VER_31:
+		r8168dp_2_mdio_write(tp, location, val);
+		break;
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+		r8168g_mdio_write(tp, location, val);
+		break;
+	default:
+		r8169_mdio_write(tp, location, val);
+		break;
+	}
 }
 
 static int rtl_readphy(struct rtl8169_private *tp, int location)
 {
-	return tp->mdio_ops.read(tp, location);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_27:
+		return r8168dp_1_mdio_read(tp, location);
+	case RTL_GIGA_MAC_VER_28:
+	case RTL_GIGA_MAC_VER_31:
+		return r8168dp_2_mdio_read(tp, location);
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+		return r8168g_mdio_read(tp, location);
+	default:
+		return r8169_mdio_read(tp, location);
+	}
 }
 
 static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value)
@@ -1400,9 +1394,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 
 	rtl_unlock_config_regs(tp);
 
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+	if (rtl_is_8168evl_up(tp)) {
 		tmp = ARRAY_SIZE(cfg) - 1;
 		if (wolopts & WAKE_MAGIC)
 			rtl_eri_set_bits(tp, 0x0dc, ERIAR_MASK_0100,
@@ -1410,10 +1402,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		else
 			rtl_eri_clear_bits(tp, 0x0dc, ERIAR_MASK_0100,
 					   MagicPacket_v2);
-		break;
-	default:
+	} else {
 		tmp = ARRAY_SIZE(cfg);
-		break;
 	}
 
 	for (i = 0; i < tmp; i++) {
@@ -1424,7 +1414,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	}
 
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_17:
 		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
@@ -1794,18 +1784,16 @@ static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = {
 static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct ethtool_link_ksettings ecmd;
 	const struct rtl_coalesce_info *ci;
-	int rc;
 
-	rc = phy_ethtool_get_link_ksettings(dev, &ecmd);
-	if (rc < 0)
-		return ERR_PTR(rc);
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		ci = rtl_coalesce_info_8169;
+	else
+		ci = rtl_coalesce_info_8168_8136;
 
-	for (ci = tp->coalesce_info; ci->speed != 0; ci++) {
-		if (ecmd.base.speed == ci->speed) {
+	for (; ci->speed; ci++) {
+		if (tp->phydev->speed == ci->speed)
 			return ci;
-		}
 	}
 
 	return ERR_PTR(-ELNRNG);
@@ -1954,9 +1942,7 @@ static int rtl_get_eee_supp(struct rtl8169_private *tp)
 		ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
 		break;
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		phy_write(phydev, 0x1f, 0x0a5c);
-		ret = phy_read(phydev, 0x12);
-		phy_write(phydev, 0x1f, 0x0000);
+		ret = phy_read_paged(phydev, 0x0a5c, 0x12);
 		break;
 	default:
 		ret = -EPROTONOSUPPORT;
@@ -1979,9 +1965,7 @@ static int rtl_get_eee_lpadv(struct rtl8169_private *tp)
 		ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
 		break;
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		phy_write(phydev, 0x1f, 0x0a5d);
-		ret = phy_read(phydev, 0x11);
-		phy_write(phydev, 0x1f, 0x0000);
+		ret = phy_read_paged(phydev, 0x0a5d, 0x11);
 		break;
 	default:
 		ret = -EPROTONOSUPPORT;
@@ -2004,9 +1988,7 @@ static int rtl_get_eee_adv(struct rtl8169_private *tp)
 		ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
 		break;
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		phy_write(phydev, 0x1f, 0x0a5d);
-		ret = phy_read(phydev, 0x10);
-		phy_write(phydev, 0x1f, 0x0000);
+		ret = phy_read_paged(phydev, 0x0a5d, 0x10);
 		break;
 	default:
 		ret = -EPROTONOSUPPORT;
@@ -2029,9 +2011,7 @@ static int rtl_set_eee_adv(struct rtl8169_private *tp, int val)
 		ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val);
 		break;
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		phy_write(phydev, 0x1f, 0x0a5d);
-		phy_write(phydev, 0x10, val);
-		phy_write(phydev, 0x1f, 0x0000);
+		phy_write_paged(phydev, 0x0a5d, 0x10, val);
 		break;
 	default:
 		ret = -EPROTONOSUPPORT;
@@ -2252,7 +2232,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
 		{ 0xfc8, 0x100,	RTL_GIGA_MAC_VER_04 },
 		{ 0xfc8, 0x040,	RTL_GIGA_MAC_VER_03 },
 		{ 0xfc8, 0x008,	RTL_GIGA_MAC_VER_02 },
-		{ 0xfc8, 0x000,	RTL_GIGA_MAC_VER_01 },
 
 		/* Catch-all */
 		{ 0x000, 0x000,	RTL_GIGA_MAC_NONE   }
@@ -2292,246 +2271,10 @@ static void __rtl_writephy_batch(struct rtl8169_private *tp,
 
 #define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a))
 
-#define PHY_READ		0x00000000
-#define PHY_DATA_OR		0x10000000
-#define PHY_DATA_AND		0x20000000
-#define PHY_BJMPN		0x30000000
-#define PHY_MDIO_CHG		0x40000000
-#define PHY_CLEAR_READCOUNT	0x70000000
-#define PHY_WRITE		0x80000000
-#define PHY_READCOUNT_EQ_SKIP	0x90000000
-#define PHY_COMP_EQ_SKIPN	0xa0000000
-#define PHY_COMP_NEQ_SKIPN	0xb0000000
-#define PHY_WRITE_PREVIOUS	0xc0000000
-#define PHY_SKIPN		0xd0000000
-#define PHY_DELAY_MS		0xe0000000
-
-struct fw_info {
-	u32	magic;
-	char	version[RTL_VER_SIZE];
-	__le32	fw_start;
-	__le32	fw_len;
-	u8	chksum;
-} __packed;
-
-#define FW_OPCODE_SIZE	sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code))
-
-static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	const struct firmware *fw = rtl_fw->fw;
-	struct fw_info *fw_info = (struct fw_info *)fw->data;
-	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	char *version = rtl_fw->version;
-	bool rc = false;
-
-	if (fw->size < FW_OPCODE_SIZE)
-		goto out;
-
-	if (!fw_info->magic) {
-		size_t i, size, start;
-		u8 checksum = 0;
-
-		if (fw->size < sizeof(*fw_info))
-			goto out;
-
-		for (i = 0; i < fw->size; i++)
-			checksum += fw->data[i];
-		if (checksum != 0)
-			goto out;
-
-		start = le32_to_cpu(fw_info->fw_start);
-		if (start > fw->size)
-			goto out;
-
-		size = le32_to_cpu(fw_info->fw_len);
-		if (size > (fw->size - start) / FW_OPCODE_SIZE)
-			goto out;
-
-		memcpy(version, fw_info->version, RTL_VER_SIZE);
-
-		pa->code = (__le32 *)(fw->data + start);
-		pa->size = size;
-	} else {
-		if (fw->size % FW_OPCODE_SIZE)
-			goto out;
-
-		strlcpy(version, tp->fw_name, RTL_VER_SIZE);
-
-		pa->code = (__le32 *)fw->data;
-		pa->size = fw->size / FW_OPCODE_SIZE;
-	}
-	version[RTL_VER_SIZE - 1] = 0;
-
-	rc = true;
-out:
-	return rc;
-}
-
-static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev,
-			   struct rtl_fw_phy_action *pa)
-{
-	bool rc = false;
-	size_t index;
-
-	for (index = 0; index < pa->size; index++) {
-		u32 action = le32_to_cpu(pa->code[index]);
-		u32 regno = (action & 0x0fff0000) >> 16;
-
-		switch(action & 0xf0000000) {
-		case PHY_READ:
-		case PHY_DATA_OR:
-		case PHY_DATA_AND:
-		case PHY_MDIO_CHG:
-		case PHY_CLEAR_READCOUNT:
-		case PHY_WRITE:
-		case PHY_WRITE_PREVIOUS:
-		case PHY_DELAY_MS:
-			break;
-
-		case PHY_BJMPN:
-			if (regno > index) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-		case PHY_READCOUNT_EQ_SKIP:
-			if (index + 2 >= pa->size) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-		case PHY_COMP_EQ_SKIPN:
-		case PHY_COMP_NEQ_SKIPN:
-		case PHY_SKIPN:
-			if (index + 1 + regno >= pa->size) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-
-		default:
-			netif_err(tp, ifup, tp->dev,
-				  "Invalid action 0x%08x\n", action);
-			goto out;
-		}
-	}
-	rc = true;
-out:
-	return rc;
-}
-
-static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	struct net_device *dev = tp->dev;
-	int rc = -EINVAL;
-
-	if (!rtl_fw_format_ok(tp, rtl_fw)) {
-		netif_err(tp, ifup, dev, "invalid firmware\n");
-		goto out;
-	}
-
-	if (rtl_fw_data_ok(tp, dev, &rtl_fw->phy_action))
-		rc = 0;
-out:
-	return rc;
-}
-
-static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	struct mdio_ops org, *ops = &tp->mdio_ops;
-	u32 predata, count;
-	size_t index;
-
-	predata = count = 0;
-	org.write = ops->write;
-	org.read = ops->read;
-
-	for (index = 0; index < pa->size; ) {
-		u32 action = le32_to_cpu(pa->code[index]);
-		u32 data = action & 0x0000ffff;
-		u32 regno = (action & 0x0fff0000) >> 16;
-
-		if (!action)
-			break;
-
-		switch(action & 0xf0000000) {
-		case PHY_READ:
-			predata = rtl_readphy(tp, regno);
-			count++;
-			index++;
-			break;
-		case PHY_DATA_OR:
-			predata |= data;
-			index++;
-			break;
-		case PHY_DATA_AND:
-			predata &= data;
-			index++;
-			break;
-		case PHY_BJMPN:
-			index -= regno;
-			break;
-		case PHY_MDIO_CHG:
-			if (data == 0) {
-				ops->write = org.write;
-				ops->read = org.read;
-			} else if (data == 1) {
-				ops->write = mac_mcu_write;
-				ops->read = mac_mcu_read;
-			}
-
-			index++;
-			break;
-		case PHY_CLEAR_READCOUNT:
-			count = 0;
-			index++;
-			break;
-		case PHY_WRITE:
-			rtl_writephy(tp, regno, data);
-			index++;
-			break;
-		case PHY_READCOUNT_EQ_SKIP:
-			index += (count == data) ? 2 : 1;
-			break;
-		case PHY_COMP_EQ_SKIPN:
-			if (predata == data)
-				index += regno;
-			index++;
-			break;
-		case PHY_COMP_NEQ_SKIPN:
-			if (predata != data)
-				index += regno;
-			index++;
-			break;
-		case PHY_WRITE_PREVIOUS:
-			rtl_writephy(tp, regno, predata);
-			index++;
-			break;
-		case PHY_SKIPN:
-			index += regno + 1;
-			break;
-		case PHY_DELAY_MS:
-			mdelay(data);
-			index++;
-			break;
-
-		default:
-			BUG();
-		}
-	}
-
-	ops->write = org.write;
-	ops->read = org.read;
-}
-
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
 	if (tp->rtl_fw) {
-		release_firmware(tp->rtl_fw->fw);
+		rtl_fw_release_firmware(tp->rtl_fw);
 		kfree(tp->rtl_fw);
 		tp->rtl_fw = NULL;
 	}
@@ -2539,9 +2282,9 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
 
 static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
+	/* TODO: release firmware if rtl_fw_write_firmware signals failure. */
 	if (tp->rtl_fw)
-		rtl_phy_write_fw(tp, tp->rtl_fw);
+		rtl_fw_write_firmware(tp, tp->rtl_fw);
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2578,9 +2321,7 @@ static void rtl8168f_config_eee_phy(struct rtl8169_private *tp)
 
 static void rtl8168g_config_eee_phy(struct rtl8169_private *tp)
 {
-	phy_write(tp->phydev, 0x1f, 0x0a43);
-	phy_set_bits(tp->phydev, 0x11, BIT(4));
-	phy_write(tp->phydev, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4));
 }
 
 static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
@@ -2910,50 +2651,59 @@ static void rtl8168c_4_hw_phy_config(struct rtl8169_private *tp)
 	rtl8168c_3_hw_phy_config(tp);
 }
 
-static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+	/* Channel Estimation */
+	{ 0x1f, 0x0001 },
+	{ 0x06, 0x4064 },
+	{ 0x07, 0x2863 },
+	{ 0x08, 0x059c },
+	{ 0x09, 0x26b4 },
+	{ 0x0a, 0x6a19 },
+	{ 0x0b, 0xdcc8 },
+	{ 0x10, 0xf06d },
+	{ 0x14, 0x7f68 },
+	{ 0x18, 0x7fd9 },
+	{ 0x1c, 0xf0ff },
+	{ 0x1d, 0x3d9c },
+	{ 0x1f, 0x0003 },
+	{ 0x12, 0xf49f },
+	{ 0x13, 0x070b },
+	{ 0x1a, 0x05ad },
+	{ 0x14, 0x94c0 },
 
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
+	/*
+	 * Tx Error Issue
+	 * Enhance line driver power
+	 */
+	{ 0x1f, 0x0002 },
+	{ 0x06, 0x5561 },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8332 },
+	{ 0x06, 0x5561 },
 
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
+	/*
+	 * Can not link to 1Gbps with bad cable
+	 * Decrease SNR threshold form 21.07dB to 19.04dB
+	 */
+	{ 0x1f, 0x0001 },
+	{ 0x17, 0x0cc0 },
 
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
+	{ 0x1f, 0x0000 },
+	{ 0x0d, 0xf880 }
+};
 
-	rtl_writephy_batch(tp, phy_reg_init_0);
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+	{ 0x1f, 0x0002 },
+	{ 0x05, 0x669a },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8330 },
+	{ 0x06, 0x669a },
+	{ 0x1f, 0x0002 }
+};
+
+static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
+{
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	/*
 	 * Rx Error Issue
@@ -2964,17 +2714,9 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init);
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 
@@ -3023,62 +2765,12 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 
 static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
 {
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
-
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
-
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init_0);
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init);
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 		if ((val & 0x00ff) != 0x006c) {
@@ -3528,20 +3220,15 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
 
 static void rtl8168g_disable_aldps(struct rtl8169_private *tp)
 {
-	phy_write(tp->phydev, 0x1f, 0x0a43);
-	phy_clear_bits(tp->phydev, 0x10, BIT(2));
+	phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0);
 }
 
 static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp)
 {
 	struct phy_device *phydev = tp->phydev;
 
-	phy_write(phydev, 0x1f, 0x0bcc);
-	phy_clear_bits(phydev, 0x14, BIT(8));
-
-	phy_write(phydev, 0x1f, 0x0a44);
-	phy_set_bits(phydev, 0x11, BIT(7) | BIT(6));
-
+	phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
 	phy_write(phydev, 0x1f, 0x0a43);
 	phy_write(phydev, 0x13, 0x8084);
 	phy_clear_bits(phydev, 0x14, BIT(14) | BIT(13));
@@ -3552,43 +3239,36 @@ static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp)
 
 static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
 {
+	int ret;
+
 	rtl_apply_firmware(tp);
 
-	rtl_writephy(tp, 0x1f, 0x0a46);
-	if (rtl_readphy(tp, 0x10) & 0x0100) {
-		rtl_writephy(tp, 0x1f, 0x0bcc);
-		rtl_w0w1_phy(tp, 0x12, 0x0000, 0x8000);
-	} else {
-		rtl_writephy(tp, 0x1f, 0x0bcc);
-		rtl_w0w1_phy(tp, 0x12, 0x8000, 0x0000);
-	}
+	ret = phy_read_paged(tp->phydev, 0x0a46, 0x10);
+	if (ret & BIT(8))
+		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0);
+	else
+		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15));
 
-	rtl_writephy(tp, 0x1f, 0x0a46);
-	if (rtl_readphy(tp, 0x13) & 0x0100) {
-		rtl_writephy(tp, 0x1f, 0x0c41);
-		rtl_w0w1_phy(tp, 0x15, 0x0002, 0x0000);
-	} else {
-		rtl_writephy(tp, 0x1f, 0x0c41);
-		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x0002);
-	}
+	ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
+	if (ret & BIT(8))
+		phy_modify_paged(tp->phydev, 0x0c41, 0x12, 0, BIT(1));
+	else
+		phy_modify_paged(tp->phydev, 0x0c41, 0x12, BIT(1), 0);
 
 	/* Enable PHY auto speed down */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
 
 	rtl8168g_phy_adjust_10m_aldps(tp);
 
 	/* EEE auto-fallback function */
-	rtl_writephy(tp, 0x1f, 0x0a4b);
-	rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
 
 	/* Enable UC LPF tune function */
 	rtl_writephy(tp, 0x1f, 0x0a43);
 	rtl_writephy(tp, 0x13, 0x8012);
 	rtl_w0w1_phy(tp, 0x14, 0x8000, 0x0000);
 
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
 	/* Improve SWR Efficiency */
 	rtl_writephy(tp, 0x1f, 0x0bcd);
@@ -3600,6 +3280,7 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x14, 0x1065);
 	rtl_writephy(tp, 0x14, 0x9065);
 	rtl_writephy(tp, 0x14, 0x1065);
+	rtl_writephy(tp, 0x1f, 0x0000);
 
 	rtl8168g_disable_aldps(tp);
 	rtl8168g_config_eee_phy(tp);
@@ -3684,14 +3365,10 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* enable GPHY 10M */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
 
 	/* SAR ADC performance */
-	rtl_writephy(tp, 0x1f, 0x0bca);
-	rtl_w0w1_phy(tp, 0x17, 0x4000, 0x3000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
 
 	rtl_writephy(tp, 0x1f, 0x0a43);
 	rtl_writephy(tp, 0x13, 0x803f);
@@ -3711,9 +3388,7 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* disable phy pfm mode */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
 
 	rtl8168g_disable_aldps(tp);
 	rtl8168g_config_eee_phy(tp);
@@ -3743,9 +3418,7 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* enable GPHY 10M */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
 
 	r8168_mac_ocp_write(tp, 0xdd02, 0x807d);
 	data = r8168_mac_ocp_read(tp, 0xdd02);
@@ -3781,9 +3454,7 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* disable phy pfm mode */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
 
 	rtl8168g_disable_aldps(tp);
 	rtl8168g_config_eee_phy(tp);
@@ -3793,16 +3464,12 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
 static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
 {
 	/* Enable PHY auto speed down */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
 
 	rtl8168g_phy_adjust_10m_aldps(tp);
 
 	/* Enable EEE auto-fallback function */
-	rtl_writephy(tp, 0x1f, 0x0a4b);
-	rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
 
 	/* Enable UC LPF tune function */
 	rtl_writephy(tp, 0x1f, 0x0a43);
@@ -3811,9 +3478,7 @@ static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* set rg_sel_sdm_rate */
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
 	rtl8168g_disable_aldps(tp);
 	rtl8168g_config_eee_phy(tp);
@@ -3831,9 +3496,7 @@ static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* Set rg_sel_sdm_rate */
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
 	/* Channel estimation parameters */
 	rtl_writephy(tp, 0x1f, 0x0a43);
@@ -3985,7 +3648,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 {
 	static const rtl_generic_fct phy_configs[] = {
 		/* PCI devices. */
-		[RTL_GIGA_MAC_VER_01] = NULL,
 		[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
 		[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
 		[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
@@ -4050,12 +3712,6 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
 		schedule_work(&tp->wk.work);
 }
 
-static bool rtl_tbi_enabled(struct rtl8169_private *tp)
-{
-	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	       (RTL_R8(tp, PHYstatus) & TBI_Enable);
-}
-
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
 	rtl_hw_phy_config(dev);
@@ -4124,31 +3780,6 @@ static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return phy_mii_ioctl(tp->phydev, ifr, cmd);
 }
 
-static void rtl_init_mdio_ops(struct rtl8169_private *tp)
-{
-	struct mdio_ops *ops = &tp->mdio_ops;
-
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_27:
-		ops->write	= r8168dp_1_mdio_write;
-		ops->read	= r8168dp_1_mdio_read;
-		break;
-	case RTL_GIGA_MAC_VER_28:
-	case RTL_GIGA_MAC_VER_31:
-		ops->write	= r8168dp_2_mdio_write;
-		ops->read	= r8168dp_2_mdio_read;
-		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		ops->write	= r8168g_mdio_write;
-		ops->read	= r8168g_mdio_read;
-		break;
-	default:
-		ops->write	= r8169_mdio_write;
-		ops->read	= r8169_mdio_read;
-		break;
-	}
-}
-
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
@@ -4168,7 +3799,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 	}
 }
 
-static void r8168_pll_power_down(struct rtl8169_private *tp)
+static void rtl_pll_power_down(struct rtl8169_private *tp)
 {
 	if (r8168_check_dash(tp))
 		return;
@@ -4203,10 +3834,12 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 		rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
+	default:
+		break;
 	}
 }
 
-static void r8168_pll_power_up(struct rtl8169_private *tp)
+static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
@@ -4230,6 +3863,8 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
 		break;
+	default:
+		break;
 	}
 
 	phy_resume(tp->phydev);
@@ -4237,32 +3872,10 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	msleep(20);
 }
 
-static void rtl_pll_power_down(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_down(tp);
-	}
-}
-
-static void rtl_pll_power_up(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_up(tp);
-	}
-}
-
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
 		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
@@ -4285,24 +3898,6 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
 }
 
-static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
-{
-	if (tp->jumbo_ops.enable) {
-		rtl_unlock_config_regs(tp);
-		tp->jumbo_ops.enable(tp);
-		rtl_lock_config_regs(tp);
-	}
-}
-
-static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
-{
-	if (tp->jumbo_ops.disable) {
-		rtl_unlock_config_regs(tp);
-		tp->jumbo_ops.disable(tp);
-		rtl_lock_config_regs(tp);
-	}
-}
-
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
@@ -4369,55 +3964,56 @@ static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
-static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
+static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	struct jumbo_ops *ops = &tp->jumbo_ops;
-
+	rtl_unlock_config_regs(tp);
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
-		ops->disable	= r8168b_0_hw_jumbo_disable;
-		ops->enable	= r8168b_0_hw_jumbo_enable;
+		r8168b_0_hw_jumbo_enable(tp);
 		break;
 	case RTL_GIGA_MAC_VER_12:
 	case RTL_GIGA_MAC_VER_17:
-		ops->disable	= r8168b_1_hw_jumbo_disable;
-		ops->enable	= r8168b_1_hw_jumbo_enable;
+		r8168b_1_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_18: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_19:
-	case RTL_GIGA_MAC_VER_20:
-	case RTL_GIGA_MAC_VER_21: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_22:
-	case RTL_GIGA_MAC_VER_23:
-	case RTL_GIGA_MAC_VER_24:
-	case RTL_GIGA_MAC_VER_25:
-	case RTL_GIGA_MAC_VER_26:
-		ops->disable	= r8168c_hw_jumbo_disable;
-		ops->enable	= r8168c_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26:
+		r8168c_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_27:
-	case RTL_GIGA_MAC_VER_28:
-		ops->disable	= r8168dp_hw_jumbo_disable;
-		ops->enable	= r8168dp_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28:
+		r8168dp_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_31: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_32:
-	case RTL_GIGA_MAC_VER_33:
-	case RTL_GIGA_MAC_VER_34:
-		ops->disable	= r8168e_hw_jumbo_disable;
-		ops->enable	= r8168e_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34:
+		r8168e_hw_jumbo_enable(tp);
 		break;
+	default:
+		break;
+	}
+	rtl_lock_config_regs(tp);
+}
 
-	/*
-	 * No action needed for jumbo frames with 8169.
-	 * No jumbo for 810x at all.
-	 */
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
+static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	rtl_unlock_config_regs(tp);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_11:
+		r8168b_0_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_12:
+	case RTL_GIGA_MAC_VER_17:
+		r8168b_1_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26:
+		r8168c_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28:
+		r8168dp_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34:
+		r8168e_hw_jumbo_disable(tp);
+		break;
 	default:
-		ops->disable	= NULL;
-		ops->enable	= NULL;
 		break;
 	}
+	rtl_lock_config_regs(tp);
 }
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
@@ -4435,35 +4031,28 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 static void rtl_request_firmware(struct rtl8169_private *tp)
 {
 	struct rtl_fw *rtl_fw;
-	int rc = -ENOMEM;
 
 	/* firmware loaded already or no firmware available */
 	if (tp->rtl_fw || !tp->fw_name)
 		return;
 
 	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
-	if (!rtl_fw)
-		goto err_warn;
-
-	rc = request_firmware(&rtl_fw->fw, tp->fw_name, tp_to_dev(tp));
-	if (rc < 0)
-		goto err_free;
-
-	rc = rtl_check_firmware(tp, rtl_fw);
-	if (rc < 0)
-		goto err_release_firmware;
-
-	tp->rtl_fw = rtl_fw;
+	if (!rtl_fw) {
+		netif_warn(tp, ifup, tp->dev, "Unable to load firmware, out of memory\n");
+		return;
+	}
 
-	return;
+	rtl_fw->phy_write = rtl_writephy;
+	rtl_fw->phy_read = rtl_readphy;
+	rtl_fw->mac_mcu_write = mac_mcu_write;
+	rtl_fw->mac_mcu_read = mac_mcu_read;
+	rtl_fw->fw_name = tp->fw_name;
+	rtl_fw->dev = tp_to_dev(tp);
 
-err_release_firmware:
-	release_firmware(rtl_fw->fw);
-err_free:
-	kfree(rtl_fw);
-err_warn:
-	netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n",
-		   tp->fw_name, rc);
+	if (rtl_fw_request_firmware(rtl_fw))
+		kfree(rtl_fw);
+	else
+		tp->rtl_fw = rtl_fw;
 }
 
 static void rtl_rx_close(struct rtl8169_private *tp)
@@ -4513,8 +4102,7 @@ static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
 	u32 val = TX_DMA_BURST << TxDMAShift |
 		  InterFrameGap << TxInterFrameGapShift;
 
-	if (tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
-	    tp->mac_version != RTL_GIGA_MAC_VER_39)
+	if (rtl_is_8168evl_up(tp))
 		val |= TXCFG_AUTO_FIFO;
 
 	RTL_W32(tp, TxConfig, val);
@@ -4608,53 +4196,6 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	RTL_W32(tp, RxConfig, tmp);
 }
 
-static void rtl_hw_start(struct  rtl8169_private *tp)
-{
-	rtl_unlock_config_regs(tp);
-
-	tp->hw_start(tp);
-
-	rtl_set_rx_max_size(tp);
-	rtl_set_rx_tx_desc_registers(tp);
-	rtl_lock_config_regs(tp);
-
-	/* disable interrupt coalescing */
-	RTL_W16(tp, IntrMitigate, 0x0000);
-	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-	RTL_R8(tp, IntrMask);
-	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-	rtl_init_rxcfg(tp);
-	rtl_set_tx_config_registers(tp);
-
-	rtl_set_rx_mode(tp->dev);
-	/* no early-rx interrupts */
-	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
-	rtl_irq_enable(tp);
-}
-
-static void rtl_hw_start_8169(struct rtl8169_private *tp)
-{
-	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
-		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-
-	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
-
-	tp->cp_cmd |= PCIMulRW;
-
-	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
-		netif_dbg(tp, drv, tp->dev,
-			  "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
-		tp->cp_cmd |= (1 << 14);
-	}
-
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-
-	rtl8169_set_magic_reg(tp, tp->mac_version);
-
-	RTL_W32(tp, RxMissed, 0);
-}
-
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
 	return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
@@ -4746,7 +4287,8 @@ static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp)
 
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
-	if (enable) {
+	/* Don't enable ASPM in the chip if OS can't control ASPM */
+	if (enable && tp->aspm_manageable) {
 		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
 	} else {
@@ -4779,9 +4321,6 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
@@ -4792,8 +4331,6 @@ static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
 	rtl_hw_start_8168bb(tp);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
@@ -4807,9 +4344,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
 	rtl_disable_clock_request(tp);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -4837,9 +4371,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -4851,13 +4382,8 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 	/* Magic. */
 	RTL_W8(tp, DBG_REG, 0x20);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -4909,13 +4435,8 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 
 	rtl_disable_clock_request(tp);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
@@ -4925,8 +4446,6 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	rtl_disable_clock_request(tp);
 }
 
@@ -4942,8 +4461,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	rtl_ephy_init(tp, e_info_8168d_4);
 
 	rtl_enable_clock_request(tp);
@@ -4974,8 +4491,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	rtl_disable_clock_request(tp);
 
 	/* Reset tx FIFO pointer */
@@ -5007,8 +4522,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4));
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00);
 
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
-
 	rtl_disable_clock_request(tp);
 
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
@@ -5037,8 +4550,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060);
 
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
-
 	rtl_disable_clock_request(tp);
 
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
@@ -5095,7 +4606,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
@@ -5193,7 +4703,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
@@ -5269,7 +4778,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
@@ -5536,33 +5044,70 @@ static void rtl_hw_config(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168(struct rtl8169_private *tp)
 {
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+	if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_16)
+		pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
+					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-	/* Workaround for RxFIFO overflow. */
-	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
-		tp->irq_mask |= RxFIFOOver;
-		tp->irq_mask &= ~RxOverflow;
-	}
+	if (rtl_is_8168evl_up(tp))
+		RTL_W8(tp, MaxTxPacketSize, EarlySize);
+	else
+		RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_hw_config(tp);
 }
 
-static void rtl_hw_start_8101(struct rtl8169_private *tp)
+static void rtl_hw_start_8169(struct rtl8169_private *tp)
 {
-	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
-		tp->irq_mask &= ~RxFIFOOver;
+	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
 
-	if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_16)
-		pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
-					 PCI_EXP_DEVCTL_NOSNOOP_EN);
+	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
+
+	tp->cp_cmd |= PCIMulRW;
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
+		netif_dbg(tp, drv, tp->dev,
+			  "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
+		tp->cp_cmd |= (1 << 14);
+	}
 
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl_hw_config(tp);
+	rtl8169_set_magic_reg(tp, tp->mac_version);
+
+	RTL_W32(tp, RxMissed, 0);
+}
+
+static void rtl_hw_start(struct  rtl8169_private *tp)
+{
+	rtl_unlock_config_regs(tp);
+
+	tp->cp_cmd &= CPCMD_MASK;
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		rtl_hw_start_8169(tp);
+	else
+		rtl_hw_start_8168(tp);
+
+	rtl_set_rx_max_size(tp);
+	rtl_set_rx_tx_desc_registers(tp);
+	rtl_lock_config_regs(tp);
+
+	/* disable interrupt coalescing */
+	RTL_W16(tp, IntrMitigate, 0x0000);
+	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
+	RTL_R8(tp, IntrMask);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+	rtl_init_rxcfg(tp);
+	rtl_set_tx_config_registers(tp);
+
+	rtl_set_rx_mode(tp->dev);
+	/* no early-rx interrupts */
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
+	rtl_irq_enable(tp);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -5834,7 +5379,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 static void r8169_csum_workaround(struct rtl8169_private *tp,
 				  struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_is_gso(skb)) {
 		netdev_features_t features = tp->dev->features;
 		struct sk_buff *segs, *nskb;
 
@@ -5857,11 +5402,8 @@ static void r8169_csum_workaround(struct rtl8169_private *tp,
 
 		rtl8169_start_xmit(skb, tp->dev);
 	} else {
-		struct net_device_stats *stats;
-
 drop:
-		stats = &tp->dev->stats;
-		stats->tx_dropped++;
+		tp->dev->stats.tx_dropped++;
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -5889,8 +5431,7 @@ static int msdn_giant_send_check(struct sk_buff *skb)
 	return ret;
 }
 
-static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp,
-				struct sk_buff *skb, u32 *opts)
+static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts)
 {
 	u32 mss = skb_shinfo(skb)->gso_size;
 
@@ -5907,8 +5448,6 @@ static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp,
 		else
 			WARN_ON_ONCE(1);
 	}
-
-	return true;
 }
 
 static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
@@ -5998,6 +5537,18 @@ static bool rtl_tx_slots_avail(struct rtl8169_private *tp,
 	return slots_avail > nr_frags;
 }
 
+/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */
+static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
+{
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
+		return false;
+	default:
+		return true;
+	}
+}
+
 static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
@@ -6017,12 +5568,16 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	if (unlikely(le32_to_cpu(txd->opts1) & DescOwn))
 		goto err_stop_0;
 
-	opts[1] = cpu_to_le32(rtl8169_tx_vlan_tag(skb));
+	opts[1] = rtl8169_tx_vlan_tag(skb);
 	opts[0] = DescOwn;
 
-	if (!tp->tso_csum(tp, skb, opts)) {
-		r8169_csum_workaround(tp, skb);
-		return NETDEV_TX_OK;
+	if (rtl_chip_supports_csum_v2(tp)) {
+		if (!rtl8169_tso_csum_v2(tp, skb, opts)) {
+			r8169_csum_workaround(tp, skb);
+			return NETDEV_TX_OK;
+		}
+	} else {
+		rtl8169_tso_csum_v1(skb, opts);
 	}
 
 	len = skb_headlen(skb);
@@ -6229,7 +5784,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 	skb = napi_alloc_skb(&tp->napi, pkt_size);
 	if (skb)
 		skb_copy_to_linear_data(skb, data, pkt_size);
-	dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
 	return skb;
 }
@@ -6264,14 +5818,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 				dev->stats.rx_length_errors++;
 			if (status & RxCRC)
 				dev->stats.rx_crc_errors++;
-			/* RxFOVF is a reserved bit on later chip versions */
-			if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
-			    status & RxFOVF) {
-				rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
-				dev->stats.rx_fifo_errors++;
-			} else if (status & (RxRUNT | RxCRC) &&
-				   !(status & RxRWT) &&
-				   dev->features & NETIF_F_RXALL) {
+			if (status & (RxRUNT | RxCRC) && !(status & RxRWT) &&
+			    dev->features & NETIF_F_RXALL) {
 				goto process_pkt;
 			}
 		} else {
@@ -6451,7 +5999,10 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
 	if (ret)
 		return ret;
 
-	if (!tp->supports_gmii)
+	if (tp->supports_gmii)
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	else
 		phy_set_max_speed(phydev, SPEED_100);
 
 	phy_support_asym_pause(phydev);
@@ -6884,30 +6435,18 @@ static const struct net_device_ops rtl_netdev_ops = {
 
 };
 
-static const struct rtl_cfg_info {
-	void (*hw_start)(struct rtl8169_private *tp);
-	u16 irq_mask;
-	unsigned int has_gmii:1;
-	const struct rtl_coalesce_info *coalesce_info;
-} rtl_cfg_infos [] = {
-	[RTL_CFG_0] = {
-		.hw_start	= rtl_hw_start_8169,
-		.irq_mask	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-		.has_gmii	= 1,
-		.coalesce_info	= rtl_coalesce_info_8169,
-	},
-	[RTL_CFG_1] = {
-		.hw_start	= rtl_hw_start_8168,
-		.irq_mask	= LinkChg | RxOverflow,
-		.has_gmii	= 1,
-		.coalesce_info	= rtl_coalesce_info_8168_8136,
-	},
-	[RTL_CFG_2] = {
-		.hw_start	= rtl_hw_start_8101,
-		.irq_mask	= LinkChg | RxOverflow | RxFIFOOver,
-		.coalesce_info	= rtl_coalesce_info_8168_8136,
-	}
-};
+static void rtl_set_irq_mask(struct rtl8169_private *tp)
+{
+	tp->irq_mask = RTL_EVENT_NAPI | LinkChg;
+
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
+	else if (tp->mac_version == RTL_GIGA_MAC_VER_11)
+		/* special workaround needed */
+		tp->irq_mask |= RxFIFOOver;
+	else
+		tp->irq_mask |= RxOverflow;
+}
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
@@ -6928,13 +6467,10 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 static void rtl_read_mac_address(struct rtl8169_private *tp,
 				 u8 mac_addr[ETH_ALEN])
 {
-	u32 value;
-
 	/* Get MAC address */
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		value = rtl_eri_read(tp, 0xe0);
+	if (rtl_is_8168evl_up(tp) && tp->mac_version != RTL_GIGA_MAC_VER_34) {
+		u32 value = rtl_eri_read(tp, 0xe0);
+
 		mac_addr[0] = (value >>  0) & 0xff;
 		mac_addr[1] = (value >>  8) & 0xff;
 		mac_addr[2] = (value >> 16) & 0xff;
@@ -6943,9 +6479,6 @@ static void rtl_read_mac_address(struct rtl8169_private *tp,
 		value = rtl_eri_read(tp, 0xe4);
 		mac_addr[4] = (value >>  0) & 0xff;
 		mac_addr[5] = (value >>  8) & 0xff;
-		break;
-	default:
-		break;
 	}
 }
 
@@ -7046,42 +6579,23 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	data |= (1 << 15);
 	r8168_mac_ocp_write(tp, 0xe8de, data);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
-		return;
-}
-
-static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
-{
-	rtl8168ep_stop_cmac(tp);
-	rtl_hw_init_8168g(tp);
+	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
 static void rtl_hw_initialize(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
+		rtl8168ep_stop_cmac(tp);
+		/* fall through */
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
 		rtl_hw_init_8168g(tp);
 		break;
-	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
-		rtl_hw_init_8168ep(tp);
-		break;
 	default:
 		break;
 	}
 }
 
-/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */
-static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
-		return false;
-	default:
-		return true;
-	}
-}
-
 static int rtl_jumbo_max(struct rtl8169_private *tp)
 {
 	/* Non-GBit versions don't support jumbo frames */
@@ -7090,7 +6604,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
 
 	switch (tp->mac_version) {
 	/* RTL8169 */
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 		return JUMBO_7K;
 	/* RTL8168b */
 	case RTL_GIGA_MAC_VER_11:
@@ -7136,14 +6650,36 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp)
 	return rc;
 }
 
+static void rtl_init_mac_address(struct rtl8169_private *tp)
+{
+	struct net_device *dev = tp->dev;
+	u8 *mac_addr = dev->dev_addr;
+	int rc, i;
+
+	rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
+	if (!rc)
+		goto done;
+
+	rtl_read_mac_address(tp, mac_addr);
+	if (is_valid_ether_addr(mac_addr))
+		goto done;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = RTL_R8(tp, MAC0 + i);
+	if (is_valid_ether_addr(mac_addr))
+		goto done;
+
+	eth_hw_addr_random(dev);
+	dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
+done:
+	rtl_rar_set(tp, mac_addr);
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
-	/* align to u16 for is_valid_ether_addr() */
-	u8 mac_addr[ETH_ALEN] __aligned(2) = {};
 	struct rtl8169_private *tp;
 	struct net_device *dev;
-	int chipset, region, i;
+	int chipset, region;
 	int jumbo_max, rc;
 
 	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
@@ -7156,7 +6692,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->dev = dev;
 	tp->pci_dev = pdev;
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
-	tp->supports_gmii = cfg->has_gmii;
+	tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1;
 
 	/* Get the *optional* external "ether_clk" used on some boards */
 	rc = rtl_get_ether_clk(tp);
@@ -7166,7 +6702,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Disable ASPM completely as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users.
 	 */
-	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+	rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
+					  PCIE_LINK_STATE_L1);
+	tp->aspm_manageable = !rc;
 
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
@@ -7204,23 +6742,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (tp->mac_version == RTL_GIGA_MAC_NONE)
 		return -ENODEV;
 
-	if (rtl_tbi_enabled(tp)) {
-		dev_err(&pdev->dev, "TBI fiber mode not supported\n");
-		return -ENODEV;
-	}
-
 	tp->cp_cmd = RTL_R16(tp, CPlusCmd);
 
 	if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
-	    !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+	    !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
 		dev->features |= NETIF_F_HIGHDMA;
-	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc < 0) {
-			dev_err(&pdev->dev, "DMA configuration failed\n");
-			return rc;
-		}
-	}
 
 	rtl_init_rxcfg(tp);
 
@@ -7232,9 +6758,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
-	rtl_init_mdio_ops(tp);
-	rtl_init_jumbo_ops(tp);
-
 	chipset = tp->mac_version;
 
 	rc = rtl_alloc_irq(tp);
@@ -7248,16 +6771,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u64_stats_init(&tp->rx_stats.syncp);
 	u64_stats_init(&tp->tx_stats.syncp);
 
-	/* get MAC address */
-	rc = eth_platform_get_mac_address(&pdev->dev, mac_addr);
-	if (rc)
-		rtl_read_mac_address(tp, mac_addr);
-
-	if (is_valid_ether_addr(mac_addr))
-		rtl_rar_set(tp, mac_addr);
-
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
+	rtl_init_mac_address(tp);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 
@@ -7285,12 +6799,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		/* Disallow toggling */
 		dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 
-	if (rtl_chip_supports_csum_v2(tp)) {
-		tp->tso_csum = rtl8169_tso_csum_v2;
+	if (rtl_chip_supports_csum_v2(tp))
 		dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
-	} else {
-		tp->tso_csum = rtl8169_tso_csum_v1;
-	}
 
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
@@ -7300,9 +6810,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	jumbo_max = rtl_jumbo_max(tp);
 	dev->max_mtu = jumbo_max;
 
-	tp->hw_start = cfg->hw_start;
-	tp->irq_mask = RTL_EVENT_NAPI | cfg->irq_mask;
-	tp->coalesce_info = cfg->coalesce_info;
+	rtl_set_irq_mask(tp);
 
 	tp->fw_name = rtl_chip_infos[chipset].fw_name;
 
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 3e5bc1fc3c46..079f459c73a5 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2210,6 +2210,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
 				return notifier_from_errno(-EINVAL);
 			}
+			if (fen_info->fi->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
 		}
 
 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index bdfa6a19d620..7072b249c8bd 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -18,6 +18,7 @@
 #include <net/neighbour.h>
 #include <net/switchdev.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/arp.h>
 
 #include "rocker.h"
@@ -2282,8 +2283,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port,  __be32 dst,
 
 	/* XXX support ECMP */
 
-	nh = fi->fib_nh;
-	nh_on_port = (fi->fib_dev == ofdpa_port->dev);
+	nh = fib_info_nh(fi, 0);
+	nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev);
 	has_gw = !!nh->fib_nh_gw4;
 
 	if (has_gw && nh_on_port) {
@@ -2733,11 +2734,13 @@ static int ofdpa_fib4_add(struct rocker *rocker,
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct fib_nh *nh;
 	int err;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	nh = fib_info_nh(fen_info->fi, 0);
+	ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
 	err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
@@ -2745,7 +2748,7 @@ static int ofdpa_fib4_add(struct rocker *rocker,
 				  fen_info->tb_id, 0);
 	if (err)
 		return err;
-	fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+	nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 	return 0;
 }
 
@@ -2754,13 +2757,15 @@ static int ofdpa_fib4_del(struct rocker *rocker,
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct fib_nh *nh;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	nh = fib_info_nh(fen_info->fi, 0);
+	ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
-	fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+	nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
 				   fen_info->dst_len, fen_info->fi,
 				   fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2780,14 +2785,16 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
 
 	spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
 	hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+		struct fib_nh *nh;
+
 		if (flow_entry->key.tbl_id !=
 		    ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
 			continue;
-		ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
-						       rocker);
+		nh = fib_info_nh(flow_entry->fi, 0);
+		ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 		if (!ofdpa_port)
 			continue;
-		flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+		nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 		ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
 				   flow_entry);
 	}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 53b726bfe945..ab58b837df47 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3614,11 +3614,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 		netif_warn(efx, probe, efx->net_dev,
 			   "failed to create MTDs (%d)\n", rc);
 
-	rc = pci_enable_pcie_error_reporting(pci_dev);
-	if (rc && rc != -EINVAL)
-		netif_notice(efx, probe, efx->net_dev,
-			     "PCIE error reporting unavailable (%d).\n",
-			     rc);
+	(void)pci_enable_pcie_error_reporting(pci_dev);
 
 	if (efx->type->udp_tnl_push_ports)
 		efx->type->udp_tnl_push_ports(efx);
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 9b036c857b1d..aba6eea72f15 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -360,7 +360,7 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev,
  *	SiS962 or SiS963 model, use EEPROM to store MAC address. And EEPROM
  *	is shared by
  *	LAN and 1394. When access EEPROM, send EEREQ signal to hardware first
- *	and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be access
+ *	and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be accessed
  *	by LAN, otherwise is not. After MAC address is read from EEPROM, send
  *	EEDONE signal to refuse EEPROM access by LAN.
  *	The EEPROM map of SiS962 or SiS963 is different to SiS900.
@@ -882,7 +882,7 @@ static void mdio_reset(struct sis900_private *sp)
  *	mdio_read - read MII PHY register
  *	@net_dev: the net device to read
  *	@phy_id: the phy address to read
- *	@location: the phy regiester id to read
+ *	@location: the phy register id to read
  *
  *	Read MII registers through MDIO and MDC
  *	using MDIO management frame structure and protocol(defined by ISO/IEC).
@@ -926,7 +926,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location)
  *	mdio_write - write MII PHY register
  *	@net_dev: the net device to write
  *	@phy_id: the phy address to write
- *	@location: the phy regiester id to write
+ *	@location: the phy register id to write
  *	@value: the register value to write with
  *
  *	Write MII registers with @value through MDIO and MDC
@@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev)
 	sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 	sw32(cr, RxENA | sr32(cr));
 	sw32(ier, IE);
 
@@ -1101,7 +1101,7 @@ sis900_init_rxfilter (struct net_device * net_dev)
 		sw32(rfdr, w);
 
 		if (netif_msg_hw(sis_priv)) {
-			printk(KERN_DEBUG "%s: Receive Filter Addrss[%d]=%x\n",
+			printk(KERN_DEBUG "%s: Receive Filter Address[%d]=%x\n",
 			       net_dev->name, i, sr32(rfdr));
 		}
 	}
@@ -1148,7 +1148,7 @@ sis900_init_tx_ring(struct net_device *net_dev)
  *	@net_dev: the net device to initialize for
  *
  *	Initialize the Rx descriptor ring,
- *	and pre-allocate recevie buffers (socket buffer)
+ *	and pre-allocate receive buffers (socket buffer)
  */
 
 static void
@@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev)
 	sw32(txdp, sis_priv->tx_ring_dma);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 }
 
 /**
@@ -1674,8 +1674,8 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
 	do {
 		status = sr32(isr);
 
-		if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0)
-			/* nothing intresting happened */
+		if ((status & (HIBERR|TxURN|TxERR|TxDESC|RxORN|RxERR|RxOK)) == 0)
+			/* nothing interesting happened */
 			break;
 		handled = 1;
 
@@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
 			/* Rx interrupt */
 			sis900_rx(net_dev);
 
-		if (status & (TxURN | TxERR | TxIDLE | TxDESC))
+		if (status & (TxURN | TxERR | TxDESC))
 			/* Tx interrupt */
 			sis900_finish_xmit(net_dev);
 
@@ -1897,7 +1897,7 @@ static void sis900_finish_xmit (struct net_device *net_dev)
 		if (tx_status & OWN) {
 			/* The packet is not transmitted yet (owned by hardware) !
 			 * Note: this is an almost impossible condition
-			 * in case of TxDESC ('descriptor interrupt') */
+			 * on TxDESC interrupt ('descriptor interrupt') */
 			break;
 		}
 
@@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev)
 	sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 	sw32(cr, RxENA | sr32(cr));
 	sw32(ier, IE);
 
diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig
index 25f18be27423..95e99baf3f45 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig
@@ -26,6 +26,7 @@ config SNI_NETSEC
 	tristate "Socionext NETSEC ethernet support"
 	depends on (ARCH_SYNQUACER || COMPILE_TEST) && OF
 	select PHYLIB
+	select PAGE_POOL
 	select MII
 	---help---
 	  Enable to add support for the SocioNext NetSec Gigabit Ethernet
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index cba5881b2746..1502fe8b0456 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -9,8 +9,12 @@
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 
 #include <net/tcp.h>
+#include <net/page_pool.h>
 #include <net/ip6_checksum.h>
 
 #define NETSEC_REG_SOFT_RST			0x104
@@ -235,22 +239,41 @@
 #define DESC_NUM	256
 
 #define NETSEC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#define NETSEC_RX_BUF_SZ 1536
+#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+			       NET_IP_ALIGN)
+#define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define DESC_SZ	sizeof(struct netsec_de)
 
 #define NETSEC_F_NETSEC_VER_MAJOR_NUM(x)	((x) & 0xffff0000)
 
+#define NETSEC_XDP_PASS          0
+#define NETSEC_XDP_CONSUMED      BIT(0)
+#define NETSEC_XDP_TX            BIT(1)
+#define NETSEC_XDP_REDIR         BIT(2)
+#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
+
 enum ring_id {
 	NETSEC_RING_TX = 0,
 	NETSEC_RING_RX
 };
 
+enum buf_type {
+	TYPE_NETSEC_SKB = 0,
+	TYPE_NETSEC_XDP_TX,
+	TYPE_NETSEC_XDP_NDO,
+};
+
 struct netsec_desc {
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
 	dma_addr_t dma_addr;
 	void *addr;
 	u16 len;
+	u8 buf_type;
 };
 
 struct netsec_desc_ring {
@@ -258,11 +281,17 @@ struct netsec_desc_ring {
 	struct netsec_desc *desc;
 	void *vaddr;
 	u16 head, tail;
+	u16 xdp_xmit; /* netsec_xdp_xmit packets */
+	bool is_xdp;
+	struct page_pool *page_pool;
+	struct xdp_rxq_info xdp_rxq;
+	spinlock_t lock; /* XDP tx queue locking */
 };
 
 struct netsec_priv {
 	struct netsec_desc_ring desc_ring[NETSEC_RING_MAX];
 	struct ethtool_coalesce et_coalesce;
+	struct bpf_prog *xdp_prog;
 	spinlock_t reglock; /* protect reg access */
 	struct napi_struct napi;
 	phy_interface_t phy_interface;
@@ -600,12 +629,14 @@ static void netsec_set_rx_de(struct netsec_priv *priv,
 static bool netsec_clean_tx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
-	unsigned int pkts, bytes;
 	struct netsec_de *entry;
 	int tail = dring->tail;
+	unsigned int bytes;
 	int cnt = 0;
 
-	pkts = 0;
+	if (dring->is_xdp)
+		spin_lock(&dring->lock);
+
 	bytes = 0;
 	entry = dring->vaddr + DESC_SZ * tail;
 
@@ -618,13 +649,23 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
 		eop = (entry->attr >> NETSEC_TX_LAST) & 1;
 		dma_rmb();
 
-		dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
-				 DMA_TO_DEVICE);
-		if (eop) {
-			pkts++;
+		/* if buf_type is either TYPE_NETSEC_SKB or
+		 * TYPE_NETSEC_XDP_NDO we mapped it
+		 */
+		if (desc->buf_type != TYPE_NETSEC_XDP_TX)
+			dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+					 DMA_TO_DEVICE);
+
+		if (!eop)
+			goto next;
+
+		if (desc->buf_type == TYPE_NETSEC_SKB) {
 			bytes += desc->skb->len;
 			dev_kfree_skb(desc->skb);
+		} else {
+			xdp_return_frame(desc->xdpf);
 		}
+next:
 		/* clean up so netsec_uninit_pkt_dring() won't free the skb
 		 * again
 		 */
@@ -641,6 +682,8 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
 		entry = dring->vaddr + DESC_SZ * tail;
 		cnt++;
 	}
+	if (dring->is_xdp)
+		spin_unlock(&dring->lock);
 
 	if (!cnt)
 		return false;
@@ -673,33 +716,31 @@ static void netsec_process_tx(struct netsec_priv *priv)
 }
 
 static void *netsec_alloc_rx_data(struct netsec_priv *priv,
-				  dma_addr_t *dma_handle, u16 *desc_len,
-				  bool napi)
+				  dma_addr_t *dma_handle, u16 *desc_len)
+
 {
-	size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	size_t payload_len = NETSEC_RX_BUF_SZ;
-	dma_addr_t mapping;
-	void *buf;
 
-	total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+	enum dma_data_direction dma_dir;
+	struct page *page;
 
-	buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
-	if (!buf)
+	page = page_pool_dev_alloc_pages(dring->page_pool);
+	if (!page)
 		return NULL;
 
-	mapping = dma_map_single(priv->dev, buf + NETSEC_SKB_PAD, payload_len,
-				 DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(priv->dev, mapping)))
-		goto err_out;
-
-	*dma_handle = mapping;
-	*desc_len = payload_len;
-
-	return buf;
+	/* We allocate the same buffer length for XDP and non-XDP cases.
+	 * page_pool API will map the whole page, skip what's needed for
+	 * network payloads and/or XDP
+	 */
+	*dma_handle = page_pool_get_dma_addr(page) + NETSEC_RXBUF_HEADROOM;
+	/* Make sure the incoming payload fits in the page for XDP and non-XDP
+	 * cases and reserve enough space for headroom + skb_shared_info
+	 */
+	*desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
+	dma_dir = page_pool_get_dma_dir(dring->page_pool);
+	dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
 
-err_out:
-	skb_free_frag(buf);
-	return NULL;
+	return page_address(page);
 }
 
 static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
@@ -716,22 +757,201 @@ static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
 	}
 }
 
+static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts)
+{
+	if (likely(pkts))
+		netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts);
+}
+
+static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res,
+				   u16 pkts)
+{
+	if (xdp_res & NETSEC_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & NETSEC_XDP_TX)
+		netsec_xdp_ring_tx_db(priv, pkts);
+}
+
+static void netsec_set_tx_de(struct netsec_priv *priv,
+			     struct netsec_desc_ring *dring,
+			     const struct netsec_tx_pkt_ctrl *tx_ctrl,
+			     const struct netsec_desc *desc, void *buf)
+{
+	int idx = dring->head;
+	struct netsec_de *de;
+	u32 attr;
+
+	de = dring->vaddr + (DESC_SZ * idx);
+
+	attr = (1 << NETSEC_TX_SHIFT_OWN_FIELD) |
+	       (1 << NETSEC_TX_SHIFT_PT_FIELD) |
+	       (NETSEC_RING_GMAC << NETSEC_TX_SHIFT_TDRID_FIELD) |
+	       (1 << NETSEC_TX_SHIFT_FS_FIELD) |
+	       (1 << NETSEC_TX_LAST) |
+	       (tx_ctrl->cksum_offload_flag << NETSEC_TX_SHIFT_CO) |
+	       (tx_ctrl->tcp_seg_offload_flag << NETSEC_TX_SHIFT_SO) |
+	       (1 << NETSEC_TX_SHIFT_TRS_FIELD);
+	if (idx == DESC_NUM - 1)
+		attr |= (1 << NETSEC_TX_SHIFT_LD_FIELD);
+
+	de->data_buf_addr_up = upper_32_bits(desc->dma_addr);
+	de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
+	de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
+	de->attr = attr;
+	/* under spin_lock if using XDP */
+	if (!dring->is_xdp)
+		dma_wmb();
+
+	dring->desc[idx] = *desc;
+	if (desc->buf_type == TYPE_NETSEC_SKB)
+		dring->desc[idx].skb = buf;
+	else if (desc->buf_type == TYPE_NETSEC_XDP_TX ||
+		 desc->buf_type == TYPE_NETSEC_XDP_NDO)
+		dring->desc[idx].xdpf = buf;
+
+	/* move head ahead */
+	dring->head = (dring->head + 1) % DESC_NUM;
+}
+
+/* The current driver only supports 1 Txq, this should run under spin_lock() */
+static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
+				struct xdp_frame *xdpf, bool is_ndo)
+
+{
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	struct page *page = virt_to_page(xdpf->data);
+	struct netsec_tx_pkt_ctrl tx_ctrl = {};
+	struct netsec_desc tx_desc;
+	dma_addr_t dma_handle;
+	u16 filled;
+
+	if (tx_ring->head >= tx_ring->tail)
+		filled = tx_ring->head - tx_ring->tail;
+	else
+		filled = tx_ring->head + DESC_NUM - tx_ring->tail;
+
+	if (DESC_NUM - filled <= 1)
+		return NETSEC_XDP_CONSUMED;
+
+	if (is_ndo) {
+		/* this is for ndo_xdp_xmit, the buffer needs mapping before
+		 * sending
+		 */
+		dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
+					    DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, dma_handle))
+			return NETSEC_XDP_CONSUMED;
+		tx_desc.buf_type = TYPE_NETSEC_XDP_NDO;
+	} else {
+		/* This is the device Rx buffer from page_pool. No need to remap
+		 * just sync and send it
+		 */
+		struct netsec_desc_ring *rx_ring =
+			&priv->desc_ring[NETSEC_RING_RX];
+		enum dma_data_direction dma_dir =
+			page_pool_get_dma_dir(rx_ring->page_pool);
+
+		dma_handle = page_pool_get_dma_addr(page) +
+			NETSEC_RXBUF_HEADROOM;
+		dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
+					   dma_dir);
+		tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
+	}
+
+	tx_desc.dma_addr = dma_handle;
+	tx_desc.addr = xdpf->data;
+	tx_desc.len = xdpf->len;
+
+	netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
+
+	return NETSEC_XDP_TX;
+}
+
+static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
+{
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+	u32 ret;
+
+	if (unlikely(!xdpf))
+		return NETSEC_XDP_CONSUMED;
+
+	spin_lock(&tx_ring->lock);
+	ret = netsec_xdp_queue_one(priv, xdpf, false);
+	spin_unlock(&tx_ring->lock);
+
+	return ret;
+}
+
+static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
+			  struct xdp_buff *xdp)
+{
+	u32 ret = NETSEC_XDP_PASS;
+	int err;
+	u32 act;
+
+	act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+		ret = NETSEC_XDP_PASS;
+		break;
+	case XDP_TX:
+		ret = netsec_xdp_xmit_back(priv, xdp);
+		if (ret != NETSEC_XDP_TX)
+			xdp_return_buff(xdp);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(priv->ndev, xdp, prog);
+		if (!err) {
+			ret = NETSEC_XDP_REDIR;
+		} else {
+			ret = NETSEC_XDP_CONSUMED;
+			xdp_return_buff(xdp);
+		}
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(priv->ndev, prog, act);
+		/* fall through -- handle aborts by dropping packet */
+	case XDP_DROP:
+		ret = NETSEC_XDP_CONSUMED;
+		xdp_return_buff(xdp);
+		break;
+	}
+
+	return ret;
+}
+
 static int netsec_process_rx(struct netsec_priv *priv, int budget)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
 	struct net_device *ndev = priv->ndev;
 	struct netsec_rx_pkt_info rx_info;
-	struct sk_buff *skb;
+	enum dma_data_direction dma_dir;
+	struct bpf_prog *xdp_prog;
+	struct sk_buff *skb = NULL;
+	u16 xdp_xmit = 0;
+	u32 xdp_act = 0;
 	int done = 0;
 
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(priv->xdp_prog);
+	dma_dir = page_pool_get_dma_dir(dring->page_pool);
+
 	while (done < budget) {
 		u16 idx = dring->tail;
 		struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
 		struct netsec_desc *desc = &dring->desc[idx];
+		struct page *page = virt_to_page(desc->addr);
+		u32 xdp_result = XDP_PASS;
 		u16 pkt_len, desc_len;
 		dma_addr_t dma_handle;
+		struct xdp_buff xdp;
 		void *buf_addr;
-		u32 truesize;
 
 		if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
 			/* reading the register clears the irq */
@@ -766,53 +986,71 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 		/* allocate a fresh buffer and map it to the hardware.
 		 * This will eventually replace the old buffer in the hardware
 		 */
-		buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
-						true);
+		buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+
 		if (unlikely(!buf_addr))
 			break;
 
 		dma_sync_single_for_cpu(priv->dev, desc->dma_addr, pkt_len,
-					DMA_FROM_DEVICE);
+					dma_dir);
 		prefetch(desc->addr);
 
-		truesize = SKB_DATA_ALIGN(desc->len + NETSEC_SKB_PAD) +
-			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-		skb = build_skb(desc->addr, truesize);
+		xdp.data_hard_start = desc->addr;
+		xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
+		xdp_set_data_meta_invalid(&xdp);
+		xdp.data_end = xdp.data + pkt_len;
+		xdp.rxq = &dring->xdp_rxq;
+
+		if (xdp_prog) {
+			xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
+			if (xdp_result != NETSEC_XDP_PASS) {
+				xdp_act |= xdp_result;
+				if (xdp_result == NETSEC_XDP_TX)
+					xdp_xmit++;
+				goto next;
+			}
+		}
+		skb = build_skb(desc->addr, desc->len + NETSEC_RX_BUF_NON_DATA);
+
 		if (unlikely(!skb)) {
-			/* free the newly allocated buffer, we are not going to
-			 * use it
+			/* If skb fails recycle_direct will either unmap and
+			 * free the page or refill the cache depending on the
+			 * cache state. Since we paid the allocation cost if
+			 * building an skb fails try to put the page into cache
 			 */
-			dma_unmap_single(priv->dev, dma_handle, desc_len,
-					 DMA_FROM_DEVICE);
-			skb_free_frag(buf_addr);
+			page_pool_recycle_direct(dring->page_pool, page);
 			netif_err(priv, drv, priv->ndev,
 				  "rx failed to build skb\n");
 			break;
 		}
-		dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len,
-				       DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
-
-		/* Update the descriptor with the new buffer we allocated */
-		desc->len = desc_len;
-		desc->dma_addr = dma_handle;
-		desc->addr = buf_addr;
+		page_pool_release_page(dring->page_pool, page);
 
-		skb_reserve(skb, NETSEC_SKB_PAD);
-		skb_put(skb, pkt_len);
+		skb_reserve(skb, xdp.data - xdp.data_hard_start);
+		skb_put(skb, xdp.data_end - xdp.data);
 		skb->protocol = eth_type_trans(skb, priv->ndev);
 
 		if (priv->rx_cksum_offload_flag &&
 		    rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
+next:
+		if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
+		    xdp_result & NETSEC_XDP_RX_OK) {
 			ndev->stats.rx_packets++;
-			ndev->stats.rx_bytes += pkt_len;
+			ndev->stats.rx_bytes += xdp.data_end - xdp.data;
 		}
 
+		/* Update the descriptor with fresh buffers */
+		desc->len = desc_len;
+		desc->dma_addr = dma_handle;
+		desc->addr = buf_addr;
+
 		netsec_rx_fill(priv, idx, 1);
 		dring->tail = (dring->tail + 1) % DESC_NUM;
 	}
+	netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
+
+	rcu_read_unlock();
 
 	return done;
 }
@@ -820,19 +1058,12 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 static int netsec_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct netsec_priv *priv;
-	int rx, done, todo;
+	int done;
 
 	priv = container_of(napi, struct netsec_priv, napi);
 
 	netsec_process_tx(priv);
-
-	todo = budget;
-	do {
-		rx = netsec_process_rx(priv, todo);
-		todo -= rx;
-	} while (rx);
-
-	done = budget - todo;
+	done = netsec_process_rx(priv, budget);
 
 	if (done < budget && napi_complete_done(napi, done)) {
 		unsigned long flags;
@@ -846,41 +1077,6 @@ static int netsec_napi_poll(struct napi_struct *napi, int budget)
 	return done;
 }
 
-static void netsec_set_tx_de(struct netsec_priv *priv,
-			     struct netsec_desc_ring *dring,
-			     const struct netsec_tx_pkt_ctrl *tx_ctrl,
-			     const struct netsec_desc *desc,
-			     struct sk_buff *skb)
-{
-	int idx = dring->head;
-	struct netsec_de *de;
-	u32 attr;
-
-	de = dring->vaddr + (DESC_SZ * idx);
-
-	attr = (1 << NETSEC_TX_SHIFT_OWN_FIELD) |
-	       (1 << NETSEC_TX_SHIFT_PT_FIELD) |
-	       (NETSEC_RING_GMAC << NETSEC_TX_SHIFT_TDRID_FIELD) |
-	       (1 << NETSEC_TX_SHIFT_FS_FIELD) |
-	       (1 << NETSEC_TX_LAST) |
-	       (tx_ctrl->cksum_offload_flag << NETSEC_TX_SHIFT_CO) |
-	       (tx_ctrl->tcp_seg_offload_flag << NETSEC_TX_SHIFT_SO) |
-	       (1 << NETSEC_TX_SHIFT_TRS_FIELD);
-	if (idx == DESC_NUM - 1)
-		attr |= (1 << NETSEC_TX_SHIFT_LD_FIELD);
-
-	de->data_buf_addr_up = upper_32_bits(desc->dma_addr);
-	de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
-	de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
-	de->attr = attr;
-	dma_wmb();
-
-	dring->desc[idx] = *desc;
-	dring->desc[idx].skb = skb;
-
-	/* move head ahead */
-	dring->head = (dring->head + 1) % DESC_NUM;
-}
 
 static int netsec_desc_used(struct netsec_desc_ring *dring)
 {
@@ -927,8 +1123,12 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
 	u16 tso_seg_len = 0;
 	int filled;
 
+	if (dring->is_xdp)
+		spin_lock_bh(&dring->lock);
 	filled = netsec_desc_used(dring);
 	if (netsec_check_stop_tx(priv, filled)) {
+		if (dring->is_xdp)
+			spin_unlock_bh(&dring->lock);
 		net_warn_ratelimited("%s %s Tx queue full\n",
 				     dev_name(priv->dev), ndev->name);
 		return NETDEV_TX_BUSY;
@@ -961,6 +1161,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
 	tx_desc.dma_addr = dma_map_single(priv->dev, skb->data,
 					  skb_headlen(skb), DMA_TO_DEVICE);
 	if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) {
+		if (dring->is_xdp)
+			spin_unlock_bh(&dring->lock);
 		netif_err(priv, drv, priv->ndev,
 			  "%s: DMA mapping failed\n", __func__);
 		ndev->stats.tx_dropped++;
@@ -969,11 +1171,14 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
 	}
 	tx_desc.addr = skb->data;
 	tx_desc.len = skb_headlen(skb);
+	tx_desc.buf_type = TYPE_NETSEC_SKB;
 
 	skb_tx_timestamp(skb);
 	netdev_sent_queue(priv->ndev, skb->len);
 
 	netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb);
+	if (dring->is_xdp)
+		spin_unlock_bh(&dring->lock);
 	netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */
 
 	return NETDEV_TX_OK;
@@ -987,19 +1192,27 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
 
 	if (!dring->vaddr || !dring->desc)
 		return;
-
 	for (idx = 0; idx < DESC_NUM; idx++) {
 		desc = &dring->desc[idx];
 		if (!desc->addr)
 			continue;
 
-		dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
-				 id == NETSEC_RING_RX ? DMA_FROM_DEVICE :
-							      DMA_TO_DEVICE);
-		if (id == NETSEC_RING_RX)
-			skb_free_frag(desc->addr);
-		else if (id == NETSEC_RING_TX)
+		if (id == NETSEC_RING_RX) {
+			struct page *page = virt_to_page(desc->addr);
+
+			page_pool_put_page(dring->page_pool, page, false);
+		} else if (id == NETSEC_RING_TX) {
+			dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+					 DMA_TO_DEVICE);
 			dev_kfree_skb(desc->skb);
+		}
+	}
+
+	/* Rx is currently using page_pool */
+	if (id == NETSEC_RING_RX) {
+		if (xdp_rxq_info_is_reg(&dring->xdp_rxq))
+			xdp_rxq_info_unreg(&dring->xdp_rxq);
+		page_pool_destroy(dring->page_pool);
 	}
 
 	memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM);
@@ -1029,7 +1242,6 @@ static void netsec_free_dring(struct netsec_priv *priv, int id)
 static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[id];
-	int i;
 
 	dring->vaddr = dma_alloc_coherent(priv->dev, DESC_SZ * DESC_NUM,
 					  &dring->desc_dma, GFP_KERNEL);
@@ -1040,19 +1252,6 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
 	if (!dring->desc)
 		goto err;
 
-	if (id == NETSEC_RING_TX) {
-		for (i = 0; i < DESC_NUM; i++) {
-			struct netsec_de *de;
-
-			de = dring->vaddr + (DESC_SZ * i);
-			/* de->attr is not going to be accessed by the NIC
-			 * until netsec_set_tx_de() is called.
-			 * No need for a dma_wmb() here
-			 */
-			de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
-		}
-	}
-
 	return 0;
 err:
 	netsec_free_dring(priv, id);
@@ -1060,10 +1259,60 @@ err:
 	return -ENOMEM;
 }
 
+static void netsec_setup_tx_dring(struct netsec_priv *priv)
+{
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+	int i;
+
+	for (i = 0; i < DESC_NUM; i++) {
+		struct netsec_de *de;
+
+		de = dring->vaddr + (DESC_SZ * i);
+		/* de->attr is not going to be accessed by the NIC
+		 * until netsec_set_tx_de() is called.
+		 * No need for a dma_wmb() here
+		 */
+		de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
+	}
+
+	if (xdp_prog)
+		dring->is_xdp = true;
+	else
+		dring->is_xdp = false;
+
+}
+
 static int netsec_setup_rx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	int i;
+	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+	struct page_pool_params pp_params = { 0 };
+	int i, err;
+
+	pp_params.order = 0;
+	/* internal DMA mapping in page_pool */
+	pp_params.flags = PP_FLAG_DMA_MAP;
+	pp_params.pool_size = DESC_NUM;
+	pp_params.nid = cpu_to_node(0);
+	pp_params.dev = priv->dev;
+	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+
+	dring->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(dring->page_pool)) {
+		err = PTR_ERR(dring->page_pool);
+		dring->page_pool = NULL;
+		goto err_out;
+	}
+
+	err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0);
+	if (err)
+		goto err_out;
+
+	err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_POOL,
+					 dring->page_pool);
+	if (err)
+		goto err_out;
 
 	for (i = 0; i < DESC_NUM; i++) {
 		struct netsec_desc *desc = &dring->desc[i];
@@ -1071,10 +1320,10 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
 		void *buf;
 		u16 len;
 
-		buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
-					   false);
+		buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+
 		if (!buf) {
-			netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+			err = -ENOMEM;
 			goto err_out;
 		}
 		desc->dma_addr = dma_handle;
@@ -1087,7 +1336,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
 	return 0;
 
 err_out:
-	return -ENOMEM;
+	netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+	return err;
 }
 
 static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg,
@@ -1361,6 +1611,7 @@ static int netsec_netdev_open(struct net_device *ndev)
 
 	pm_runtime_get_sync(priv->dev);
 
+	netsec_setup_tx_dring(priv);
 	ret = netsec_setup_rx_dring(priv);
 	if (ret) {
 		netif_err(priv, probe, priv->ndev,
@@ -1466,6 +1717,9 @@ static int netsec_netdev_init(struct net_device *ndev)
 	if (ret)
 		goto err2;
 
+	spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock);
+	spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock);
+
 	return 0;
 err2:
 	netsec_free_dring(priv, NETSEC_RING_RX);
@@ -1498,6 +1752,81 @@ static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr,
 	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
 }
 
+static int netsec_xdp_xmit(struct net_device *ndev, int n,
+			   struct xdp_frame **frames, u32 flags)
+{
+	struct netsec_priv *priv = netdev_priv(ndev);
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	int drops = 0;
+	int i;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	spin_lock(&tx_ring->lock);
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = netsec_xdp_queue_one(priv, xdpf, true);
+		if (err != NETSEC_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		} else {
+			tx_ring->xdp_xmit++;
+		}
+	}
+	spin_unlock(&tx_ring->lock);
+
+	if (unlikely(flags & XDP_XMIT_FLUSH)) {
+		netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit);
+		tx_ring->xdp_xmit = 0;
+	}
+
+	return n - drops;
+}
+
+static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog,
+			    struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = priv->ndev;
+	struct bpf_prog *old_prog;
+
+	/* For now just support only the usual MTU sized frames */
+	if (prog && dev->mtu > 1500) {
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+		return -EOPNOTSUPP;
+	}
+
+	if (netif_running(dev))
+		netsec_netdev_stop(dev);
+
+	/* Detach old prog, if any */
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (netif_running(dev))
+		netsec_netdev_open(dev);
+
+	return 0;
+}
+
+static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp)
+{
+	struct netsec_priv *priv = netdev_priv(ndev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return netsec_xdp_setup(priv, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops netsec_netdev_ops = {
 	.ndo_init		= netsec_netdev_init,
 	.ndo_uninit		= netsec_netdev_uninit,
@@ -1508,6 +1837,8 @@ static const struct net_device_ops netsec_netdev_ops = {
 	.ndo_set_mac_address    = eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= netsec_netdev_ioctl,
+	.ndo_xdp_xmit		= netsec_xdp_xmit,
+	.ndo_bpf		= netsec_xdp,
 };
 
 static int netsec_of_probe(struct platform_device *pdev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 06545d7399fc..2325b40dff6e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config STMMAC_ETH
-	tristate "STMicroelectronics 10/100/1000/EQOS Ethernet driver"
+	tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
 	depends on HAS_IOMEM && HAS_DMA
 	select MII
-	select PHYLIB
+	select PAGE_POOL
+	select PHYLINK
 	select CRC32
 	imply PTP_1588_CLOCK
 	select RESET_CONTROLLER
@@ -13,6 +14,16 @@ config STMMAC_ETH
 
 if STMMAC_ETH
 
+config STMMAC_SELFTESTS
+	bool "Support for STMMAC Selftests"
+	depends on INET
+	depends on STMMAC_ETH
+	default n
+	---help---
+	  This adds support for STMMAC Selftests using ethtool. Enable this
+	  feature if you are facing problems with your HW and submit the test
+	  results to the netdev Mailing List.
+
 config STMMAC_PLATFORM
 	tristate "STMMAC Platform bus support"
 	depends on STMMAC_ETH
@@ -31,7 +42,6 @@ if STMMAC_PLATFORM
 
 config DWMAC_DWC_QOS_ETH
 	tristate "Support for snps,dwc-qos-ethernet.txt DT binding."
-	select PHYLIB
 	select CRC32
 	select MII
 	depends on OF && HAS_DMA
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c529c21e9bdd..c59926d96bcc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -8,6 +8,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
 	      $(stmmac-y)
 
+stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
+
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ceb0d23f5041..ed872eed1cab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -246,12 +246,13 @@ struct stmmac_safety_stats {
 
 /* Max/Min RI Watchdog Timer count value */
 #define MAX_DMA_RIWT		0xff
-#define MIN_DMA_RIWT		0x20
+#define MIN_DMA_RIWT		0x10
 /* Tx coalesce parameters */
 #define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	25
+#define STMMAC_TX_FRAMES	1
+#define STMMAC_RX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
@@ -325,6 +326,7 @@ struct dma_features {
 	/* 802.3az - Energy-Efficient Ethernet (EEE) */
 	unsigned int eee;
 	unsigned int av;
+	unsigned int hash_tb_sz;
 	unsigned int tsoen;
 	/* TX and RX csum */
 	unsigned int tx_coe;
@@ -351,6 +353,7 @@ struct dma_features {
 	unsigned int frpsel;
 	unsigned int frpbs;
 	unsigned int frpes;
+	unsigned int addr64;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -392,8 +395,12 @@ struct mac_link {
 	u32 speed100;
 	u32 speed1000;
 	u32 speed2500;
-	u32 speed10000;
 	u32 duplex;
+	struct {
+		u32 speed2500;
+		u32 speed5000;
+		u32 speed10000;
+	} xgmii;
 };
 
 struct mii_regs {
@@ -414,12 +421,13 @@ struct mac_device_info {
 	const struct stmmac_mode_ops *mode;
 	const struct stmmac_hwtimestamp *ptp;
 	const struct stmmac_tc_ops *tc;
+	const struct stmmac_mmc_ops *mmc;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
-	int multicast_filter_bins;
-	int unicast_filter_entries;
-	int mcast_bits_log2;
+	unsigned int multicast_filter_bins;
+	unsigned int unicast_filter_entries;
+	unsigned int mcast_bits_log2;
 	unsigned int rx_csum;
 	unsigned int pcs;
 	unsigned int pmt;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 126b66bb73a6..79f2ee37afed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -9,6 +9,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_net.h>
+#include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/stmmac.h>
 
@@ -298,6 +299,9 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
 		return ret;
 	}
 
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
 	return 0;
 }
 
@@ -307,6 +311,9 @@ static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
 	const struct mediatek_dwmac_variant *variant = plat->variant;
 
 	clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 }
 
 static int mediatek_dwmac_probe(struct platform_device *pdev)
@@ -349,6 +356,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
 	plat_dat->has_gmac4 = 1;
 	plat_dat->has_gmac = 0;
 	plat_dat->pmt = 0;
+	plat_dat->riwt_off = 1;
 	plat_dat->maxmtu = ETH_DATA_LEN;
 	plat_dat->bsp_priv = priv_plat;
 	plat_dat->init = mediatek_dwmac_init;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 8bdbddeec117..c141fe783e87 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -27,9 +27,12 @@
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003
 #define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010
+#define SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000100
 
 #define SYSMGR_FPGAGRP_MODULE_REG  0x00000028
 #define SYSMGR_FPGAGRP_MODULE_EMAC 0x00000004
+#define SYSMGR_FPGAINTF_EMAC_REG	0x00000070
+#define SYSMGR_FPGAINTF_EMAC_BIT	0x1
 
 #define EMAC_SPLITTER_CTRL_REG			0x0
 #define EMAC_SPLITTER_CTRL_SPEED_MASK		0x3
@@ -37,6 +40,11 @@
 #define EMAC_SPLITTER_CTRL_SPEED_100		0x3
 #define EMAC_SPLITTER_CTRL_SPEED_1000		0x0
 
+struct socfpga_dwmac;
+struct socfpga_dwmac_ops {
+	int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv);
+};
+
 struct socfpga_dwmac {
 	int	interface;
 	u32	reg_offset;
@@ -48,6 +56,7 @@ struct socfpga_dwmac {
 	void __iomem *splitter_base;
 	bool f2h_ptp_ref_clk;
 	struct tse_pcs pcs;
+	const struct socfpga_dwmac_ops *ops;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
@@ -222,25 +231,36 @@ err_node_put:
 	return ret;
 }
 
-static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
+static int socfpga_set_phy_mode_common(int phymode, u32 *val)
 {
-	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
-	int phymode = dwmac->interface;
-	u32 reg_offset = dwmac->reg_offset;
-	u32 reg_shift = dwmac->reg_shift;
-	u32 ctrl, val, module;
-
 	switch (phymode) {
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
-		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
 		break;
 	case PHY_INTERFACE_MODE_MII:
 	case PHY_INTERFACE_MODE_GMII:
 	case PHY_INTERFACE_MODE_SGMII:
-		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII;
 		break;
 	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+	int phymode = dwmac->interface;
+	u32 reg_offset = dwmac->reg_offset;
+	u32 reg_shift = dwmac->reg_shift;
+	u32 ctrl, val, module;
+
+	if (socfpga_set_phy_mode_common(phymode, &val)) {
 		dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
 		return -EINVAL;
 	}
@@ -291,6 +311,62 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
 	return 0;
 }
 
+static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+	int phymode = dwmac->interface;
+	u32 reg_offset = dwmac->reg_offset;
+	u32 reg_shift = dwmac->reg_shift;
+	u32 ctrl, val, module;
+
+	if (socfpga_set_phy_mode_common(phymode, &val))
+		return -EINVAL;
+
+	/* Overwrite val to GMII if splitter core is enabled. The phymode here
+	 * is the actual phy mode on phy hardware, but phy interface from
+	 * EMAC core is GMII.
+	 */
+	if (dwmac->splitter_base)
+		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+
+	/* Assert reset to the enet controller before changing the phy mode */
+	reset_control_assert(dwmac->stmmac_ocp_rst);
+	reset_control_assert(dwmac->stmmac_rst);
+
+	regmap_read(sys_mgr_base_addr, reg_offset, &ctrl);
+	ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK);
+	ctrl |= val;
+
+	if (dwmac->f2h_ptp_ref_clk ||
+	    phymode == PHY_INTERFACE_MODE_MII ||
+	    phymode == PHY_INTERFACE_MODE_GMII ||
+	    phymode == PHY_INTERFACE_MODE_SGMII) {
+		ctrl |= SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+		regmap_read(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+			    &module);
+		module |= (SYSMGR_FPGAINTF_EMAC_BIT << reg_shift);
+		regmap_write(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+			     module);
+	} else {
+		ctrl &= ~SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+	}
+
+	regmap_write(sys_mgr_base_addr, reg_offset, ctrl);
+
+	/* Deassert reset for the phy configuration to be sampled by
+	 * the enet controller, and operation to start in requested mode
+	 */
+	reset_control_deassert(dwmac->stmmac_ocp_rst);
+	reset_control_deassert(dwmac->stmmac_rst);
+	if (phymode == PHY_INTERFACE_MODE_SGMII) {
+		if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+			dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 static int socfpga_dwmac_probe(struct platform_device *pdev)
 {
 	struct plat_stmmacenet_data *plat_dat;
@@ -300,6 +376,13 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	struct socfpga_dwmac	*dwmac;
 	struct net_device	*ndev;
 	struct stmmac_priv	*stpriv;
+	const struct socfpga_dwmac_ops *ops;
+
+	ops = device_get_match_data(&pdev->dev);
+	if (!ops) {
+		dev_err(&pdev->dev, "no of match data provided\n");
+		return -EINVAL;
+	}
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (ret)
@@ -330,6 +413,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
+	dwmac->ops = ops;
 	plat_dat->bsp_priv = dwmac;
 	plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
 
@@ -346,7 +430,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	 */
 	dwmac->stmmac_rst = stpriv->plat->stmmac_rst;
 
-	ret = socfpga_dwmac_set_phy_mode(dwmac);
+	ret = ops->set_phy_mode(dwmac);
 	if (ret)
 		goto err_dvr_remove;
 
@@ -365,8 +449,9 @@ static int socfpga_dwmac_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev);
 
-	socfpga_dwmac_set_phy_mode(priv->plat->bsp_priv);
+	dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv);
 
 	/* Before the enet controller is suspended, the phy is suspended.
 	 * This causes the phy clock to be gated. The enet controller is
@@ -393,8 +478,17 @@ static int socfpga_dwmac_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend,
 					       socfpga_dwmac_resume);
 
+static const struct socfpga_dwmac_ops socfpga_gen5_ops = {
+	.set_phy_mode = socfpga_gen5_set_phy_mode,
+};
+
+static const struct socfpga_dwmac_ops socfpga_gen10_ops = {
+	.set_phy_mode = socfpga_gen10_set_phy_mode,
+};
+
 static const struct of_device_id socfpga_dwmac_match[] = {
-	{ .compatible = "altr,socfpga-stmmac" },
+	{ .compatible = "altr,socfpga-stmmac", .data = &socfpga_gen5_ops },
+	{ .compatible = "altr,socfpga-stmmac-a10-s10", .data = &socfpga_gen10_ops },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, socfpga_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index a69c34f605b1..2856f3fe5266 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -138,6 +138,20 @@ static const struct emac_variant emac_variant_a64 = {
 	.tx_delay_max = 7,
 };
 
+static const struct emac_variant emac_variant_h6 = {
+	.default_syscon_value = 0x50000,
+	.syscon_field = &sun8i_syscon_reg_field,
+	/* The "Internal PHY" of H6 is not on the die. It's on the
+	 * co-packaged AC200 chip instead.
+	 */
+	.soc_has_internal_phy = false,
+	.support_mii = true,
+	.support_rmii = true,
+	.support_rgmii = true,
+	.rx_delay_max = 31,
+	.tx_delay_max = 7,
+};
+
 #define EMAC_BASIC_CTL0 0x00
 #define EMAC_BASIC_CTL1 0x04
 #define EMAC_INT_STA    0x08
@@ -275,18 +289,18 @@ static void sun8i_dwmac_dma_init(void __iomem *ioaddr,
 
 static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_rx_phy, u32 chan)
+				    dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* Write RX descriptors address */
-	writel(dma_rx_phy, ioaddr + EMAC_RX_DESC_LIST);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + EMAC_RX_DESC_LIST);
 }
 
 static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_tx_phy, u32 chan)
+				    dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* Write TX descriptors address */
-	writel(dma_tx_phy, ioaddr + EMAC_TX_DESC_LIST);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + EMAC_TX_DESC_LIST);
 }
 
 /* sun8i_dwmac_dump_regs() - Dump EMAC address space
@@ -884,6 +898,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 		 * address. No need to mask it again.
 		 */
 		reg |= 1 << H3_EPHY_ADDR_SHIFT;
+	} else {
+		/* For SoCs without internal PHY the PHY selection bit should be
+		 * set to 0 (external PHY).
+		 */
+		reg &= ~H3_EPHY_SELECT;
 	}
 
 	if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
@@ -977,6 +996,18 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
 		regulator_disable(gmac->regulator);
 }
 
+static void sun8i_dwmac_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + EMAC_BASIC_CTL0);
+
+	if (enable)
+		value |= EMAC_LOOPBACK;
+	else
+		value &= ~EMAC_LOOPBACK;
+
+	writel(value, ioaddr + EMAC_BASIC_CTL0);
+}
+
 static const struct stmmac_ops sun8i_dwmac_ops = {
 	.core_init = sun8i_dwmac_core_init,
 	.set_mac = sun8i_dwmac_set_mac,
@@ -986,6 +1017,7 @@ static const struct stmmac_ops sun8i_dwmac_ops = {
 	.flow_ctrl = sun8i_dwmac_flow_ctrl,
 	.set_umac_addr = sun8i_dwmac_set_umac_addr,
 	.get_umac_addr = sun8i_dwmac_get_umac_addr,
+	.set_mac_loopback = sun8i_dwmac_set_mac_loopback,
 };
 
 static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
@@ -1203,6 +1235,8 @@ static const struct of_device_id sun8i_dwmac_match[] = {
 		.data = &emac_variant_r40 },
 	{ .compatible = "allwinner,sun50i-a64-emac",
 		.data = &emac_variant_a64 },
+	{ .compatible = "allwinner,sun50i-h6-emac",
+		.data = &emac_variant_h6 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index b83d3a98f5f1..b70d44ac0990 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -136,6 +136,7 @@ enum inter_frame_gap {
 #define GMAC_FRAME_FILTER_DAIF	0x00000008	/* DA Inverse Filtering */
 #define GMAC_FRAME_FILTER_PM	0x00000010	/* Pass all multicast */
 #define GMAC_FRAME_FILTER_DBF	0x00000020	/* Disable Broadcast frames */
+#define GMAC_FRAME_FILTER_PCF	0x00000080	/* Pass Control frames */
 #define GMAC_FRAME_FILTER_SAIF	0x00000100	/* Inverse Filtering */
 #define GMAC_FRAME_FILTER_SAF	0x00000200	/* Source Address Filter */
 #define GMAC_FRAME_FILTER_HPF	0x00000400	/* Hash or perfect Filter */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 9fff81170163..3d69da112625 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -162,7 +162,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 	memset(mc_filter, 0, sizeof(mc_filter));
 
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_FRAME_FILTER_PR;
+		value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF;
 	} else if (dev->flags & IFF_ALLMULTI) {
 		value = GMAC_FRAME_FILTER_PM;	/* pass all multi */
 	} else if (!netdev_mc_empty(dev)) {
@@ -188,6 +188,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 		}
 	}
 
+	value |= GMAC_FRAME_FILTER_HPF;
 	dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2);
 
 	/* Handle multiple unicast addresses (perfect filtering) */
@@ -206,6 +207,12 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 					    GMAC_ADDR_LOW(reg));
 			reg++;
 		}
+
+		while (reg <= perfect_addr_number) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 #ifdef FRAME_FILTER_DEBUG
@@ -489,6 +496,18 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONTROL);
+
+	if (enable)
+		value |= GMAC_CONTROL_LM;
+	else
+		value &= ~GMAC_CONTROL_LM;
+
+	writel(value, ioaddr + GMAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac1000_ops = {
 	.core_init = dwmac1000_core_init,
 	.set_mac = stmmac_set_mac,
@@ -508,6 +527,7 @@ const struct stmmac_ops dwmac1000_ops = {
 	.pcs_ctrl_ane = dwmac1000_ctrl_ane,
 	.pcs_rane = dwmac1000_rane,
 	.pcs_get_adv_lp = dwmac1000_get_adv_lp,
+	.set_mac_loopback = dwmac1000_set_mac_loopback,
 };
 
 int dwmac1000_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 1fdedf77678f..2bac49b49f73 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -112,18 +112,18 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 
 static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
 				  struct stmmac_dma_cfg *dma_cfg,
-				  u32 dma_rx_phy, u32 chan)
+				  dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base address list must be written into DMA CSR3 */
-	writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
 }
 
 static void dwmac1000_dma_init_tx(void __iomem *ioaddr,
 				  struct stmmac_dma_cfg *dma_cfg,
-				  u32 dma_tx_phy, u32 chan)
+				  dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base address list must be written into DMA CSR4 */
-	writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
 }
 
 static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 8842f6627cb8..ebcad8dd99db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -150,6 +150,18 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode)
 	return;
 }
 
+static void dwmac100_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + MAC_CONTROL);
+
+	if (enable)
+		value |= MAC_CONTROL_OM;
+	else
+		value &= ~MAC_CONTROL_OM;
+
+	writel(value, ioaddr + MAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac100_ops = {
 	.core_init = dwmac100_core_init,
 	.set_mac = stmmac_set_mac,
@@ -161,6 +173,7 @@ const struct stmmac_ops dwmac100_ops = {
 	.pmt = dwmac100_pmt,
 	.set_umac_addr = dwmac100_set_umac_addr,
 	.get_umac_addr = dwmac100_get_umac_addr,
+	.set_mac_loopback = dwmac100_set_mac_loopback,
 };
 
 int dwmac100_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index c980cc7360a4..8f0d9bc7cab5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -31,18 +31,18 @@ static void dwmac100_dma_init(void __iomem *ioaddr,
 
 static void dwmac100_dma_init_rx(void __iomem *ioaddr,
 				 struct stmmac_dma_cfg *dma_cfg,
-				 u32 dma_rx_phy, u32 chan)
+				 dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base addr lists must be written into DMA CSR3 */
-	writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
 }
 
 static void dwmac100_dma_init_tx(void __iomem *ioaddr,
 				 struct stmmac_dma_cfg *dma_cfg,
-				 u32 dma_tx_phy, u32 chan)
+				 dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base addr lists must be written into DMA CSR4 */
-	writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
 }
 
 /* Store and Forward capability is not used at all.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 80234f12bf7f..2ed11a581d80 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -15,8 +15,7 @@
 /*  MAC registers */
 #define GMAC_CONFIG			0x00000000
 #define GMAC_PACKET_FILTER		0x00000008
-#define GMAC_HASH_TAB_0_31		0x00000010
-#define GMAC_HASH_TAB_32_63		0x00000014
+#define GMAC_HASH_TAB(x)		(0x10 + (x) * 4)
 #define GMAC_RX_FLOW_CTRL		0x00000090
 #define GMAC_QX_TX_FLOW_CTRL(x)		(0x70 + x * 4)
 #define GMAC_TXQ_PRTY_MAP0		0x98
@@ -61,6 +60,8 @@
 #define GMAC_PACKET_FILTER_PR		BIT(0)
 #define GMAC_PACKET_FILTER_HMC		BIT(2)
 #define GMAC_PACKET_FILTER_PM		BIT(4)
+#define GMAC_PACKET_FILTER_PCF		BIT(7)
+#define GMAC_PACKET_FILTER_HPF		BIT(10)
 
 #define GMAC_MAX_PERFECT_ADDRESSES	128
 
@@ -157,6 +158,7 @@ enum power_event {
 #define GMAC_CONFIG_PS			BIT(15)
 #define GMAC_CONFIG_FES			BIT(14)
 #define GMAC_CONFIG_DM			BIT(13)
+#define GMAC_CONFIG_LM			BIT(12)
 #define GMAC_CONFIG_DCRS		BIT(9)
 #define GMAC_CONFIG_TE			BIT(1)
 #define GMAC_CONFIG_RE			BIT(0)
@@ -178,6 +180,7 @@ enum power_event {
 #define GMAC_HW_FEAT_MIISEL		BIT(0)
 
 /* MAC HW features1 bitmap */
+#define GMAC_HW_HASH_TB_SZ		GENMASK(25, 24)
 #define GMAC_HW_FEAT_AVSEL		BIT(20)
 #define GMAC_HW_TSOEN			BIT(18)
 #define GMAC_HW_TXFIFOSIZE		GENMASK(10, 6)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 99d772517242..01c2e2d83e76 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -400,57 +400,74 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 			      struct net_device *dev)
 {
 	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
-	unsigned int value = 0;
+	int numhashregs = (hw->multicast_filter_bins >> 5);
+	int mcbitslog2 = hw->mcast_bits_log2;
+	unsigned int value;
+	int i;
 
+	value = readl(ioaddr + GMAC_PACKET_FILTER);
+	value &= ~GMAC_PACKET_FILTER_HMC;
+	value &= ~GMAC_PACKET_FILTER_HPF;
+	value &= ~GMAC_PACKET_FILTER_PCF;
+	value &= ~GMAC_PACKET_FILTER_PM;
+	value &= ~GMAC_PACKET_FILTER_PR;
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_PACKET_FILTER_PR;
+		value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
-			(netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
+		   (netdev_mc_count(dev) > hw->multicast_filter_bins)) {
 		/* Pass all multi */
-		value = GMAC_PACKET_FILTER_PM;
-		/* Set the 64 bits of the HASH tab. To be updated if taller
-		 * hash table is used
-		 */
-		writel(0xffffffff, ioaddr + GMAC_HASH_TAB_0_31);
-		writel(0xffffffff, ioaddr + GMAC_HASH_TAB_32_63);
+		value |= GMAC_PACKET_FILTER_PM;
+		/* Set all the bits of the HASH tab */
+		for (i = 0; i < numhashregs; i++)
+			writel(0xffffffff, ioaddr + GMAC_HASH_TAB(i));
 	} else if (!netdev_mc_empty(dev)) {
-		u32 mc_filter[2];
 		struct netdev_hw_addr *ha;
+		u32 mc_filter[8];
 
 		/* Hash filter for multicast */
-		value = GMAC_PACKET_FILTER_HMC;
+		value |= GMAC_PACKET_FILTER_HMC;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
 		netdev_for_each_mc_addr(ha, dev) {
-			/* The upper 6 bits of the calculated CRC are used to
-			 * index the content of the Hash Table Reg 0 and 1.
+			/* The upper n bits of the calculated CRC are used to
+			 * index the contents of the hash table. The number of
+			 * bits used depends on the hardware configuration
+			 * selected at core configuration time.
 			 */
-			int bit_nr =
-				(bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26);
-			/* The most significant bit determines the register
-			 * to use while the other 5 bits determines the bit
-			 * within the selected register
+			int bit_nr = bitrev32(~crc32_le(~0, ha->addr,
+					ETH_ALEN)) >> (32 - mcbitslog2);
+			/* The most significant bit determines the register to
+			 * use (H/L) while the other 5 bits determine the bit
+			 * within the register.
 			 */
-			mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1F));
+			mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f));
 		}
-		writel(mc_filter[0], ioaddr + GMAC_HASH_TAB_0_31);
-		writel(mc_filter[1], ioaddr + GMAC_HASH_TAB_32_63);
+		for (i = 0; i < numhashregs; i++)
+			writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i));
 	}
 
+	value |= GMAC_PACKET_FILTER_HPF;
+
 	/* Handle multiple unicast addresses */
 	if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
 		/* Switch to promiscuous mode if more than 128 addrs
 		 * are required
 		 */
 		value |= GMAC_PACKET_FILTER_PR;
-	} else if (!netdev_uc_empty(dev)) {
-		int reg = 1;
+	} else {
 		struct netdev_hw_addr *ha;
+		int reg = 1;
 
 		netdev_for_each_uc_addr(ha, dev) {
 			dwmac4_set_umac_addr(hw, ha->addr, reg);
 			reg++;
 		}
+
+		while (reg < GMAC_MAX_PERFECT_ADDRESSES) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 	writel(value, ioaddr + GMAC_PACKET_FILTER);
@@ -468,8 +485,9 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 	if (fc & FLOW_RX) {
 		pr_debug("\tReceive Flow-Control ON\n");
 		flow |= GMAC_RX_FLOW_CTRL_RFE;
-		writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
 	}
+	writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
+
 	if (fc & FLOW_TX) {
 		pr_debug("\tTransmit Flow-Control ON\n");
 
@@ -477,7 +495,7 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 			pr_debug("\tduplex mode: PAUSE %d\n", pause_time);
 
 		for (queue = 0; queue < tx_cnt; queue++) {
-			flow |= GMAC_TX_FLOW_CTRL_TFE;
+			flow = GMAC_TX_FLOW_CTRL_TFE;
 
 			if (duplex)
 				flow |=
@@ -485,6 +503,9 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
 
 			writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
 		}
+	} else {
+		for (queue = 0; queue < tx_cnt; queue++)
+			writel(0, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
 	}
 }
 
@@ -700,6 +721,18 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONFIG);
+
+	if (enable)
+		value |= GMAC_CONFIG_LM;
+	else
+		value &= ~GMAC_CONFIG_LM;
+
+	writel(value, ioaddr + GMAC_CONFIG);
+}
+
 const struct stmmac_ops dwmac4_ops = {
 	.core_init = dwmac4_core_init,
 	.set_mac = stmmac_set_mac,
@@ -729,6 +762,7 @@ const struct stmmac_ops dwmac4_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -760,6 +794,7 @@ const struct stmmac_ops dwmac410_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -796,6 +831,7 @@ const struct stmmac_ops dwmac510_ops = {
 	.safety_feat_dump = dwmac5_safety_feat_dump,
 	.rxp_config = dwmac5_rxp_config,
 	.flex_pps_config = dwmac5_flex_pps_config,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index cf6436d3d6c7..dbde23e7e169 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -443,6 +443,15 @@ static void dwmac4_clear(struct dma_desc *p)
 	p->des3 = 0;
 }
 
+static int set_16kib_bfsize(int mtu)
+{
+	int ret = 0;
+
+	if (unlikely(mtu >= BUF_SIZE_8KiB))
+		ret = BUF_SIZE_16KiB;
+	return ret;
+}
+
 const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.tx_status = dwmac4_wrback_get_tx_status,
 	.rx_status = dwmac4_wrback_get_rx_status,
@@ -469,4 +478,6 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.clear = dwmac4_clear,
 };
 
-const struct stmmac_mode_ops dwmac4_ring_mode_ops = { };
+const struct stmmac_mode_ops dwmac4_ring_mode_ops = {
+	.set_16kib_bfsize = set_16kib_bfsize,
+};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 0f208e13da9f..3ed5508586ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -70,7 +70,7 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 
 static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_rx_phy, u32 chan)
+				    dma_addr_t dma_rx_phy, u32 chan)
 {
 	u32 value;
 	u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
@@ -79,12 +79,12 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
 	value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
-	writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
 }
 
 static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_tx_phy, u32 chan)
+				    dma_addr_t dma_tx_phy, u32 chan)
 {
 	u32 value;
 	u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
@@ -97,7 +97,7 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
-	writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
 }
 
 static void dwmac4_dma_init_channel(void __iomem *ioaddr,
@@ -351,6 +351,7 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* MAC HW feature1 */
 	hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
+	dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24;
 	dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
 	dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
 	/* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 85826524683c..f2a29a90e085 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -85,10 +85,6 @@ void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 
 	value &= ~DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
-
-	value = readl(ioaddr + GMAC_CONFIG);
-	value &= ~GMAC_CONFIG_RE;
-	writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 085b700a4994..7f86dffb264d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -15,10 +15,14 @@
 /* MAC Registers */
 #define XGMAC_TX_CONFIG			0x00000000
 #define XGMAC_CONFIG_SS_OFF		29
-#define XGMAC_CONFIG_SS_MASK		GENMASK(30, 29)
+#define XGMAC_CONFIG_SS_MASK		GENMASK(31, 29)
 #define XGMAC_CONFIG_SS_10000		(0x0 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_2500		(0x2 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_1000		(0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500_GMII	(0x2 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_1000_GMII	(0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_100_MII		(0x4 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_5000		(0x5 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500		(0x6 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_10_MII		(0x7 << XGMAC_CONFIG_SS_OFF)
 #define XGMAC_CONFIG_SARC		GENMASK(22, 20)
 #define XGMAC_CONFIG_SARC_SHIFT		20
 #define XGMAC_CONFIG_JD			BIT(16)
@@ -29,6 +33,7 @@
 #define XGMAC_CONFIG_GPSL		GENMASK(29, 16)
 #define XGMAC_CONFIG_GPSL_SHIFT		16
 #define XGMAC_CONFIG_S2KP		BIT(11)
+#define XGMAC_CONFIG_LM			BIT(10)
 #define XGMAC_CONFIG_IPC		BIT(9)
 #define XGMAC_CONFIG_JE			BIT(8)
 #define XGMAC_CONFIG_WD			BIT(7)
@@ -39,6 +44,7 @@
 #define XGMAC_CORE_INIT_RX		0
 #define XGMAC_PACKET_FILTER		0x00000008
 #define XGMAC_FILTER_RA			BIT(31)
+#define XGMAC_FILTER_PCF		BIT(7)
 #define XGMAC_FILTER_PM			BIT(4)
 #define XGMAC_FILTER_HMC		BIT(2)
 #define XGMAC_FILTER_PR			BIT(0)
@@ -81,6 +87,7 @@
 #define XGMAC_HWFEAT_GMIISEL		BIT(1)
 #define XGMAC_HW_FEATURE1		0x00000120
 #define XGMAC_HWFEAT_TSOEN		BIT(18)
+#define XGMAC_HWFEAT_ADDR64		GENMASK(15, 14)
 #define XGMAC_HWFEAT_TXFIFOSIZE		GENMASK(10, 6)
 #define XGMAC_HWFEAT_RXFIFOSIZE		GENMASK(4, 0)
 #define XGMAC_HW_FEATURE2		0x00000124
@@ -166,6 +173,7 @@
 #define XGMAC_EN_LPI			BIT(15)
 #define XGMAC_LPI_XIT_PKT		BIT(14)
 #define XGMAC_AAL			BIT(12)
+#define XGMAC_EAME			BIT(11)
 #define XGMAC_BLEN			GENMASK(7, 1)
 #define XGMAC_BLEN256			BIT(7)
 #define XGMAC_BLEN128			BIT(6)
@@ -175,6 +183,10 @@
 #define XGMAC_BLEN8			BIT(2)
 #define XGMAC_BLEN4			BIT(1)
 #define XGMAC_UNDEF			BIT(0)
+#define XGMAC_TX_EDMA_CTRL		0x00003040
+#define XGMAC_TDPS			GENMASK(29, 0)
+#define XGMAC_RX_EDMA_CTRL		0x00003044
+#define XGMAC_RDPS			GENMASK(29, 0)
 #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
 #define XGMAC_PBLx8			BIT(16)
 #define XGMAC_DMA_CH_TX_CONTROL(x)	(0x00003104 + (0x80 * (x)))
@@ -187,7 +199,9 @@
 #define XGMAC_RxPBL			GENMASK(21, 16)
 #define XGMAC_RxPBL_SHIFT		16
 #define XGMAC_RXST			BIT(0)
+#define XGMAC_DMA_CH_TxDESC_HADDR(x)	(0x00003110 + (0x80 * (x)))
 #define XGMAC_DMA_CH_TxDESC_LADDR(x)	(0x00003114 + (0x80 * (x)))
+#define XGMAC_DMA_CH_RxDESC_HADDR(x)	(0x00003118 + (0x80 * (x)))
 #define XGMAC_DMA_CH_RxDESC_LADDR(x)	(0x0000311c + (0x80 * (x)))
 #define XGMAC_DMA_CH_TxDESC_TAIL_LPTR(x)	(0x00003124 + (0x80 * (x)))
 #define XGMAC_DMA_CH_RxDESC_TAIL_LPTR(x)	(0x0000312c + (0x80 * (x)))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 64b8cb88ea45..0a32c96a7854 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -36,7 +36,7 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
 
 		switch (hw->ps) {
 		case SPEED_10000:
-			tx |= hw->link.speed10000;
+			tx |= hw->link.xgmii.speed10000;
 			break;
 		case SPEED_2500:
 			tx |= hw->link.speed2500;
@@ -310,7 +310,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
 	u32 value = XGMAC_FILTER_RA;
 
 	if (dev->flags & IFF_PROMISC) {
-		value |= XGMAC_FILTER_PR;
+		value |= XGMAC_FILTER_PR | XGMAC_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
 		   (netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
 		value |= XGMAC_FILTER_PM;
@@ -321,6 +321,18 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
 	writel(value, ioaddr + XGMAC_PACKET_FILTER);
 }
 
+static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+	if (enable)
+		value |= XGMAC_CONFIG_LM;
+	else
+		value &= ~XGMAC_CONFIG_LM;
+
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -350,6 +362,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.pcs_get_adv_lp = NULL,
 	.debug = NULL,
 	.set_filter = dwxgmac2_set_filter,
+	.set_mac_loopback = dwxgmac2_set_mac_loopback,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
@@ -368,11 +381,13 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
 		mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
 	mac->link.duplex = 0;
-	mac->link.speed10 = 0;
-	mac->link.speed100 = 0;
-	mac->link.speed1000 = XGMAC_CONFIG_SS_1000;
-	mac->link.speed2500 = XGMAC_CONFIG_SS_2500;
-	mac->link.speed10000 = XGMAC_CONFIG_SS_10000;
+	mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
+	mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
+	mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII;
+	mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII;
+	mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500;
+	mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000;
+	mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000;
 	mac->link.speed_mask = XGMAC_CONFIG_SS_MASK;
 
 	mac->mii.addr = XGMAC_MDIO_ADDR;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 98fa471da7c0..c4c45402b8f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -242,8 +242,8 @@ static void dwxgmac2_get_addr(struct dma_desc *p, unsigned int *addr)
 
 static void dwxgmac2_set_addr(struct dma_desc *p, dma_addr_t addr)
 {
-	p->des0 = cpu_to_le32(addr);
-	p->des1 = 0;
+	p->des0 = cpu_to_le32(lower_32_bits(addr));
+	p->des1 = cpu_to_le32(upper_32_bits(addr));
 }
 
 static void dwxgmac2_clear(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index e79037f511e1..a4f236e3593e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -27,7 +27,7 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
 	if (dma_cfg->aal)
 		value |= XGMAC_AAL;
 
-	writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+	writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
 }
 
 static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
@@ -44,7 +44,7 @@ static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
 
 static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr,
 				      struct stmmac_dma_cfg *dma_cfg,
-				      u32 dma_rx_phy, u32 chan)
+				      dma_addr_t phy, u32 chan)
 {
 	u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 	u32 value;
@@ -54,12 +54,13 @@ static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr,
 	value |= (rxpbl << XGMAC_RxPBL_SHIFT) & XGMAC_RxPBL;
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 
-	writel(dma_rx_phy, ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan));
+	writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan));
+	writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan));
 }
 
 static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr,
 				      struct stmmac_dma_cfg *dma_cfg,
-				      u32 dma_tx_phy, u32 chan)
+				      dma_addr_t phy, u32 chan)
 {
 	u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
 	u32 value;
@@ -70,7 +71,8 @@ static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr,
 	value |= XGMAC_OSP;
 	writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
 
-	writel(dma_tx_phy, ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan));
+	writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan));
+	writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan));
 }
 
 static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
@@ -91,11 +93,11 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	value |= (axi->axi_rd_osr_lmt << XGMAC_RD_OSR_LMT_SHIFT) &
 		XGMAC_RD_OSR_LMT;
 
+	if (!axi->axi_fb)
+		value |= XGMAC_UNDEF;
+
 	value &= ~XGMAC_BLEN;
 	for (i = 0; i < AXI_BLEN; i++) {
-		if (axi->axi_blen[i])
-			value &= ~XGMAC_UNDEF;
-
 		switch (axi->axi_blen[i]) {
 		case 256:
 			value |= XGMAC_BLEN256;
@@ -122,6 +124,8 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	}
 
 	writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+	writel(XGMAC_TDPS, ioaddr + XGMAC_TX_EDMA_CTRL);
+	writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
 }
 
 static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
@@ -299,10 +303,6 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 	value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 	value &= ~XGMAC_RXST;
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
-
-	value = readl(ioaddr + XGMAC_RX_CONFIG);
-	value &= ~XGMAC_CONFIG_RE;
-	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
 static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
@@ -363,6 +363,23 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 	/* MAC HW feature 1 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
 	dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
+
+	dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
+	switch (dma_cap->addr64) {
+	case 0:
+		dma_cap->addr64 = 32;
+		break;
+	case 1:
+		dma_cap->addr64 = 40;
+		break;
+	case 2:
+		dma_cap->addr64 = 48;
+		break;
+	default:
+		dma_cap->addr64 = 32;
+		break;
+	}
+
 	dma_cap->tx_fifo_size =
 		128 << ((hw_cap & XGMAC_HWFEAT_TXFIFOSIZE) >> 6);
 	dma_cap->rx_fifo_size =
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 81b966a8261b..6c61b753b55e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -81,6 +81,7 @@ static const struct stmmac_hwif_entry {
 	const void *hwtimestamp;
 	const void *mode;
 	const void *tc;
+	const void *mmc;
 	int (*setup)(struct stmmac_priv *priv);
 	int (*quirks)(struct stmmac_priv *priv);
 } stmmac_hw[] = {
@@ -100,6 +101,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac100_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -117,6 +119,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac1000_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -134,6 +137,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = stmmac_dwmac4_quirks,
 	}, {
@@ -151,6 +155,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -168,6 +173,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -185,6 +191,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -202,6 +209,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = &dwmac510_tc_ops,
+		.mmc = NULL,
 		.setup = dwxgmac2_setup,
 		.quirks = NULL,
 	},
@@ -267,6 +275,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 		mac->ptp = mac->ptp ? : entry->hwtimestamp;
 		mac->mode = mac->mode ? : entry->mode;
 		mac->tc = mac->tc ? : entry->tc;
+		mac->mmc = mac->mmc ? : entry->mmc;
 
 		priv->hw = mac;
 		priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 5bb00234d961..278c0dbec9d9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -6,6 +6,7 @@
 #define __STMMAC_HWIF_H__
 
 #include <linux/netdevice.h>
+#include <linux/stmmac.h>
 
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
@@ -149,10 +150,10 @@ struct stmmac_dma_ops {
 			  struct stmmac_dma_cfg *dma_cfg, u32 chan);
 	void (*init_rx_chan)(void __iomem *ioaddr,
 			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_rx_phy, u32 chan);
+			     dma_addr_t phy, u32 chan);
 	void (*init_tx_chan)(void __iomem *ioaddr,
 			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_tx_phy, u32 chan);
+			     dma_addr_t phy, u32 chan);
 	/* Configure the AXI Bus Mode Register */
 	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
 	/* Dump DMA registers */
@@ -324,6 +325,8 @@ struct stmmac_ops {
 	int (*flex_pps_config)(void __iomem *ioaddr, int index,
 			       struct stmmac_pps_cfg *cfg, bool enable,
 			       u32 sub_second_inc, u32 systime_flags);
+	/* Loopback for selftests */
+	void (*set_mac_loopback)(void __iomem *ioaddr, bool enable);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -392,6 +395,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, rxp_config, __args)
 #define stmmac_flex_pps_config(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, flex_pps_config, __args)
+#define stmmac_set_mac_loopback(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -464,6 +469,21 @@ struct stmmac_tc_ops {
 #define stmmac_tc_setup_cbs(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, setup_cbs, __args)
 
+struct stmmac_counters;
+
+struct stmmac_mmc_ops {
+	void (*ctrl)(void __iomem *ioaddr, unsigned int mode);
+	void (*intr_all_mask)(void __iomem *ioaddr);
+	void (*read)(void __iomem *ioaddr, struct stmmac_counters *mmc);
+};
+
+#define stmmac_mmc_ctrl(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, ctrl, __args)
+#define stmmac_mmc_intr_all_mask(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, intr_all_mask, __args)
+#define stmmac_mmc_read(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, read, __args)
+
 struct stmmac_regs_off {
 	u32 ptp_off;
 	u32 mmc_off;
@@ -482,6 +502,7 @@ extern const struct stmmac_tc_ops dwmac510_tc_ops;
 extern const struct stmmac_ops dwxgmac210_ops;
 extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
 extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
+extern const struct stmmac_mmc_ops dwmac_mmc_ops;
 
 #define GMAC_VERSION		0x00000020	/* GMAC CORE Version */
 #define GMAC4_VERSION		0x00000110	/* GMAC4+ CORE Version */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index 6c8fdee3b25a..3587ceb9faf5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -118,8 +118,4 @@ struct stmmac_counters {
 	unsigned int mmc_rx_icmp_err_octets;
 };
 
-void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode);
-void dwmac_mmc_intr_all_mask(void __iomem *ioaddr);
-void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc);
-
 #endif /* __MMC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 1d967b8f91a0..a471db6d7b11 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include "hwif.h"
 #include "mmc.h"
 
 /* MAC Management Counters register offset */
@@ -118,7 +119,7 @@
 #define MMC_RX_ICMP_GD_OCTETS		0x180
 #define MMC_RX_ICMP_ERR_OCTETS		0x184
 
-void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
+static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
 {
 	u32 value = readl(mmcaddr + MMC_CNTRL);
 
@@ -131,7 +132,7 @@ void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
 }
 
 /* To mask all all interrupts.*/
-void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
+static void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
 {
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK);
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK);
@@ -143,7 +144,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
  * counter after a read. So all the field of the mmc struct
  * have to be incremented.
  */
-void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
+static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 {
 	mmc->mmc_tx_octetcount_gb += readl(mmcaddr + MMC_TX_OCTETCOUNT_GB);
 	mmc->mmc_tx_framecount_gb += readl(mmcaddr + MMC_TX_FRAMECOUNT_GB);
@@ -256,3 +257,9 @@ void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 	mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
 	mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
 }
+
+const struct stmmac_mmc_ops dwmac_mmc_ops = {
+	.ctrl = dwmac_mmc_ctrl,
+	.intr_all_mask = dwmac_mmc_intr_all_mask,
+	.read = dwmac_mmc_read,
+};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 62a64356ad22..5cd966c154f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -14,12 +14,13 @@
 
 #include <linux/clk.h>
 #include <linux/stmmac.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/pci.h>
 #include "common.h"
 #include <linux/ptp_clock_kernel.h>
 #include <linux/net_tstamp.h>
 #include <linux/reset.h>
+#include <net/page_pool.h>
 
 struct stmmac_resources {
 	void __iomem *addr;
@@ -54,13 +55,19 @@ struct stmmac_tx_queue {
 	u32 mss;
 };
 
+struct stmmac_rx_buffer {
+	struct page *page;
+	dma_addr_t addr;
+};
+
 struct stmmac_rx_queue {
+	u32 rx_count_frames;
 	u32 queue_index;
+	struct page_pool *page_pool;
+	struct stmmac_rx_buffer *buf_pool;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_erx;
 	struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
-	struct sk_buff **rx_skbuff;
-	dma_addr_t *rx_skbuff_dma;
 	unsigned int cur_rx;
 	unsigned int dirty_rx;
 	u32 rx_zeroc_thresh;
@@ -110,6 +117,7 @@ struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
+	u32 rx_coal_frames;
 
 	int tx_coalesce;
 	int hwts_tx_en;
@@ -137,14 +145,15 @@ struct stmmac_priv {
 	/* Generic channel for NAPI */
 	struct stmmac_channel channel[STMMAC_CH_MAX];
 
-	bool oldlink;
 	int speed;
-	int oldduplex;
 	unsigned int flow_ctrl;
 	unsigned int pause;
 	struct mii_bus *mii;
 	int mii_irq[PHY_MAX_ADDR];
 
+	struct phylink_config phylink_config;
+	struct phylink *phylink;
+
 	struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp;
 	struct stmmac_safety_stats sstats;
 	struct plat_stmmacenet_data *plat;
@@ -219,4 +228,26 @@ int stmmac_dvr_probe(struct device *device,
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
 
+#if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf);
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data);
+int stmmac_selftest_get_count(struct stmmac_priv *priv);
+#else
+static inline void stmmac_selftest_run(struct net_device *dev,
+				       struct ethtool_test *etest, u64 *buf)
+{
+	/* Not enabled */
+}
+static inline void stmmac_selftest_get_strings(struct stmmac_priv *priv,
+					       u8 *data)
+{
+	/* Not enabled */
+}
+static inline int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_STMMAC_SELFTESTS */
+
 #endif /* __STMMAC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e7af3dc3dd8f..6efb66820d4c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -12,7 +12,7 @@
 #include <linux/ethtool.h>
 #include <linux/interrupt.h>
 #include <linux/mii.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/net_tstamp.h>
 #include <asm/io.h>
 
@@ -264,7 +264,6 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
 					     struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = dev->phydev;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
@@ -343,18 +342,7 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
 		return 0;
 	}
 
-	if (phy == NULL) {
-		pr_err("%s: %s: PHY is not registered\n",
-		       __func__, dev->name);
-		return -ENODEV;
-	}
-	if (!netif_running(dev)) {
-		pr_err("%s: interface is disabled: we cannot track "
-		"link speed / duplex setting\n", dev->name);
-		return -EBUSY;
-	}
-	phy_ethtool_ksettings_get(phy, cmd);
-	return 0;
+	return phylink_ethtool_ksettings_get(priv->phylink, cmd);
 }
 
 static int
@@ -362,8 +350,6 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
 				  const struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = dev->phydev;
-	int rc;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
@@ -387,9 +373,7 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
 		return 0;
 	}
 
-	rc = phy_ethtool_ksettings_set(phy, cmd);
-
-	return rc;
+	return phylink_ethtool_ksettings_set(priv->phylink, cmd);
 }
 
 static u32 stmmac_ethtool_getmsglevel(struct net_device *dev)
@@ -433,6 +417,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
 	       NUM_DWMAC1000_DMA_REGS * 4);
 }
 
+static int stmmac_nway_reset(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return phylink_ethtool_nway_reset(priv->phylink);
+}
+
 static void
 stmmac_get_pauseparam(struct net_device *netdev,
 		      struct ethtool_pauseparam *pause)
@@ -440,28 +431,13 @@ stmmac_get_pauseparam(struct net_device *netdev,
 	struct stmmac_priv *priv = netdev_priv(netdev);
 	struct rgmii_adv adv_lp;
 
-	pause->rx_pause = 0;
-	pause->tx_pause = 0;
-
 	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
 		if (!adv_lp.pause)
 			return;
 	} else {
-		if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-				       netdev->phydev->supported) ||
-		    !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-				      netdev->phydev->supported))
-			return;
+		phylink_ethtool_get_pauseparam(priv->phylink, pause);
 	}
-
-	pause->autoneg = netdev->phydev->autoneg;
-
-	if (priv->flow_ctrl & FLOW_RX)
-		pause->rx_pause = 1;
-	if (priv->flow_ctrl & FLOW_TX)
-		pause->tx_pause = 1;
-
 }
 
 static int
@@ -469,39 +445,16 @@ stmmac_set_pauseparam(struct net_device *netdev,
 		      struct ethtool_pauseparam *pause)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	struct phy_device *phy = netdev->phydev;
-	int new_pause = FLOW_OFF;
 	struct rgmii_adv adv_lp;
 
 	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
 		if (!adv_lp.pause)
 			return -EOPNOTSUPP;
+		return 0;
 	} else {
-		if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-				       phy->supported) ||
-		    !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-				      phy->supported))
-			return -EOPNOTSUPP;
-	}
-
-	if (pause->rx_pause)
-		new_pause |= FLOW_RX;
-	if (pause->tx_pause)
-		new_pause |= FLOW_TX;
-
-	priv->flow_ctrl = new_pause;
-	phy->autoneg = pause->autoneg;
-
-	if (phy->autoneg) {
-		if (netif_running(netdev))
-			return phy_start_aneg(phy);
+		return phylink_ethtool_set_pauseparam(priv->phylink, pause);
 	}
-
-	stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl,
-			priv->pause, tx_cnt);
-	return 0;
 }
 
 static void stmmac_get_ethtool_stats(struct net_device *dev,
@@ -527,7 +480,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 	if (ret) {
 		/* If supported, for new GMAC chips expose the MMC counters */
 		if (priv->dma_cap.rmon) {
-			dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
+			stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
 
 			for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) {
 				char *p;
@@ -539,7 +492,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 			}
 		}
 		if (priv->eee_enabled) {
-			int val = phy_get_eee_err(dev->phydev);
+			int val = phylink_get_eee_err(priv->phylink);
 			if (val)
 				priv->xstats.phy_eee_wakeup_error_n = val;
 		}
@@ -579,6 +532,8 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 		}
 
 		return len;
+	case ETH_SS_TEST:
+		return stmmac_selftest_get_count(priv);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -615,6 +570,9 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_TEST:
+		stmmac_selftest_get_strings(priv, p);
+		break;
 	default:
 		WARN_ON(1);
 		break;
@@ -679,7 +637,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
 	edata->eee_active = priv->eee_active;
 	edata->tx_lpi_timer = priv->tx_lpi_timer;
 
-	return phy_ethtool_get_eee(dev->phydev, edata);
+	return phylink_ethtool_get_eee(priv->phylink, edata);
 }
 
 static int stmmac_ethtool_op_set_eee(struct net_device *dev,
@@ -700,7 +658,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 			return -EOPNOTSUPP;
 	}
 
-	ret = phy_ethtool_set_eee(dev->phydev, edata);
+	ret = phylink_ethtool_set_eee(priv->phylink, edata);
 	if (ret)
 		return ret;
 
@@ -743,8 +701,10 @@ static int stmmac_get_coalesce(struct net_device *dev,
 	ec->tx_coalesce_usecs = priv->tx_coal_timer;
 	ec->tx_max_coalesced_frames = priv->tx_coal_frames;
 
-	if (priv->use_riwt)
+	if (priv->use_riwt) {
+		ec->rx_max_coalesced_frames = priv->rx_coal_frames;
 		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv);
+	}
 
 	return 0;
 }
@@ -757,7 +717,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	unsigned int rx_riwt;
 
 	/* Check not supported parameters  */
-	if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) ||
+	if ((ec->rx_coalesce_usecs_irq) ||
 	    (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) ||
 	    (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) ||
 	    (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) ||
@@ -791,6 +751,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	/* Only copy relevant parameters, ignore all others. */
 	priv->tx_coal_frames = ec->tx_max_coalesced_frames;
 	priv->tx_coal_timer = ec->tx_coalesce_usecs;
+	priv->rx_coal_frames = ec->rx_max_coalesced_frames;
 	priv->rx_riwt = rx_riwt;
 	stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
 
@@ -877,9 +838,10 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_regs = stmmac_ethtool_gregs,
 	.get_regs_len = stmmac_ethtool_get_regs_len,
 	.get_link = ethtool_op_get_link,
-	.nway_reset = phy_ethtool_nway_reset,
+	.nway_reset = stmmac_nway_reset,
 	.get_pauseparam = stmmac_get_pauseparam,
 	.set_pauseparam = stmmac_set_pauseparam,
+	.self_test = stmmac_selftest_run,
 	.get_ethtool_stats = stmmac_get_ethtool_stats,
 	.get_strings = stmmac_get_strings,
 	.get_wol = stmmac_get_wol,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 06358fe5b245..c7c9e5f162e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -35,6 +35,7 @@
 #include <linux/seq_file.h>
 #endif /* CONFIG_DEBUG_FS */
 #include <linux/net_tstamp.h>
+#include <linux/phylink.h>
 #include <net/pkt_cls.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
@@ -318,21 +319,6 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 }
 
 /**
- * stmmac_hw_fix_mac_speed - callback for speed selection
- * @priv: driver private structure
- * Description: on some platforms (e.g. ST), some HW system configuration
- * registers have to be set according to the link speed negotiated.
- */
-static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
-{
-	struct net_device *ndev = priv->dev;
-	struct phy_device *phydev = ndev->phydev;
-
-	if (likely(priv->plat->fix_mac_speed))
-		priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed);
-}
-
-/**
  * stmmac_enable_eee_mode - check and enter in LPI mode
  * @priv: driver private structure
  * Description: this function is to verify and enter in LPI mode in case of
@@ -395,14 +381,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-	struct net_device *ndev = priv->dev;
-	int interface = priv->plat->interface;
-	bool ret = false;
-
-	if ((interface != PHY_INTERFACE_MODE_MII) &&
-	    (interface != PHY_INTERFACE_MODE_GMII) &&
-	    !phy_interface_mode_is_rgmii(interface))
-		goto out;
+	int tx_lpi_timer = priv->tx_lpi_timer;
 
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
@@ -410,52 +389,35 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 	if ((priv->hw->pcs == STMMAC_PCS_RGMII) ||
 	    (priv->hw->pcs == STMMAC_PCS_TBI) ||
 	    (priv->hw->pcs == STMMAC_PCS_RTBI))
-		goto out;
-
-	/* MAC core supports the EEE feature. */
-	if (priv->dma_cap.eee) {
-		int tx_lpi_timer = priv->tx_lpi_timer;
-
-		/* Check if the PHY supports EEE */
-		if (phy_init_eee(ndev->phydev, 1)) {
-			/* To manage at run-time if the EEE cannot be supported
-			 * anymore (for example because the lp caps have been
-			 * changed).
-			 * In that case the driver disable own timers.
-			 */
-			mutex_lock(&priv->lock);
-			if (priv->eee_active) {
-				netdev_dbg(priv->dev, "disable EEE\n");
-				del_timer_sync(&priv->eee_ctrl_timer);
-				stmmac_set_eee_timer(priv, priv->hw, 0,
-						tx_lpi_timer);
-			}
-			priv->eee_active = 0;
-			mutex_unlock(&priv->lock);
-			goto out;
-		}
-		/* Activate the EEE and start timers */
-		mutex_lock(&priv->lock);
-		if (!priv->eee_active) {
-			priv->eee_active = 1;
-			timer_setup(&priv->eee_ctrl_timer,
-				    stmmac_eee_ctrl_timer, 0);
-			mod_timer(&priv->eee_ctrl_timer,
-				  STMMAC_LPI_T(eee_timer));
-
-			stmmac_set_eee_timer(priv, priv->hw,
-					STMMAC_DEFAULT_LIT_LS, tx_lpi_timer);
-		}
-		/* Set HW EEE according to the speed */
-		stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link);
+		return false;
+
+	/* Check if MAC core supports the EEE feature. */
+	if (!priv->dma_cap.eee)
+		return false;
 
-		ret = true;
+	mutex_lock(&priv->lock);
+
+	/* Check if it needs to be deactivated */
+	if (!priv->eee_active) {
+		if (priv->eee_enabled) {
+			netdev_dbg(priv->dev, "disable EEE\n");
+			del_timer_sync(&priv->eee_ctrl_timer);
+			stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer);
+		}
 		mutex_unlock(&priv->lock);
+		return false;
+	}
 
-		netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
+	if (priv->eee_active && !priv->eee_enabled) {
+		timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
+		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+		stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
+				     tx_lpi_timer);
 	}
-out:
-	return ret;
+
+	mutex_unlock(&priv->lock);
+	netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
+	return true;
 }
 
 /* stmmac_get_tx_hwtstamp - get HW TX timestamps
@@ -838,97 +800,171 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
 			priv->pause, tx_cnt);
 }
 
-/**
- * stmmac_adjust_link - adjusts the link parameters
- * @dev: net device structure
- * Description: this is the helper called by the physical abstraction layer
- * drivers to communicate the phy link status. According the speed and duplex
- * this driver can invoke registered glue-logic as well.
- * It also invoke the eee initialization because it could happen when switch
- * on different networks (that are eee capable).
- */
-static void stmmac_adjust_link(struct net_device *dev)
+static void stmmac_validate(struct phylink_config *config,
+			    unsigned long *supported,
+			    struct phylink_link_state *state)
 {
-	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	bool new_state = false;
-
-	if (!phydev)
-		return;
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+	int tx_cnt = priv->plat->tx_queues_to_use;
+	int max_speed = priv->plat->max_speed;
 
-	mutex_lock(&priv->lock);
+	phylink_set(mac_supported, 10baseT_Half);
+	phylink_set(mac_supported, 10baseT_Full);
+	phylink_set(mac_supported, 100baseT_Half);
+	phylink_set(mac_supported, 100baseT_Full);
+
+	phylink_set(mac_supported, Autoneg);
+	phylink_set(mac_supported, Pause);
+	phylink_set(mac_supported, Asym_Pause);
+	phylink_set_port_modes(mac_supported);
+
+	if (priv->plat->has_gmac ||
+	    priv->plat->has_gmac4 ||
+	    priv->plat->has_xgmac) {
+		phylink_set(mac_supported, 1000baseT_Half);
+		phylink_set(mac_supported, 1000baseT_Full);
+		phylink_set(mac_supported, 1000baseKX_Full);
+	}
+
+	/* Cut down 1G if asked to */
+	if ((max_speed > 0) && (max_speed < 1000)) {
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+	} else if (priv->plat->has_xgmac) {
+		phylink_set(mac_supported, 2500baseT_Full);
+		phylink_set(mac_supported, 5000baseT_Full);
+		phylink_set(mac_supported, 10000baseSR_Full);
+		phylink_set(mac_supported, 10000baseLR_Full);
+		phylink_set(mac_supported, 10000baseER_Full);
+		phylink_set(mac_supported, 10000baseLRM_Full);
+		phylink_set(mac_supported, 10000baseT_Full);
+		phylink_set(mac_supported, 10000baseKX4_Full);
+		phylink_set(mac_supported, 10000baseKR_Full);
+	}
+
+	/* Half-Duplex can only work with single queue */
+	if (tx_cnt > 1) {
+		phylink_set(mask, 10baseT_Half);
+		phylink_set(mask, 100baseT_Half);
+		phylink_set(mask, 1000baseT_Half);
+	}
+
+	bitmap_and(supported, supported, mac_supported,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_andnot(supported, supported, mask,
+		      __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mac_supported,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_andnot(state->advertising, state->advertising, mask,
+		      __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
 
-	if (phydev->link) {
-		u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+static int stmmac_mac_link_state(struct phylink_config *config,
+				 struct phylink_link_state *state)
+{
+	return -EOPNOTSUPP;
+}
 
-		/* Now we make sure that we can be in full duplex mode.
-		 * If not, we operate in half-duplex mode. */
-		if (phydev->duplex != priv->oldduplex) {
-			new_state = true;
-			if (!phydev->duplex)
-				ctrl &= ~priv->hw->link.duplex;
-			else
-				ctrl |= priv->hw->link.duplex;
-			priv->oldduplex = phydev->duplex;
-		}
-		/* Flow Control operation */
-		if (phydev->pause)
-			stmmac_mac_flow_ctrl(priv, phydev->duplex);
-
-		if (phydev->speed != priv->speed) {
-			new_state = true;
-			ctrl &= ~priv->hw->link.speed_mask;
-			switch (phydev->speed) {
-			case SPEED_1000:
-				ctrl |= priv->hw->link.speed1000;
-				break;
-			case SPEED_100:
-				ctrl |= priv->hw->link.speed100;
-				break;
-			case SPEED_10:
-				ctrl |= priv->hw->link.speed10;
-				break;
-			default:
-				netif_warn(priv, link, priv->dev,
-					   "broken speed: %d\n", phydev->speed);
-				phydev->speed = SPEED_UNKNOWN;
-				break;
-			}
-			if (phydev->speed != SPEED_UNKNOWN)
-				stmmac_hw_fix_mac_speed(priv);
-			priv->speed = phydev->speed;
-		}
+static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+	u32 ctrl;
 
-		writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+	ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+	ctrl &= ~priv->hw->link.speed_mask;
 
-		if (!priv->oldlink) {
-			new_state = true;
-			priv->oldlink = true;
+	if (state->interface == PHY_INTERFACE_MODE_USXGMII) {
+		switch (state->speed) {
+		case SPEED_10000:
+			ctrl |= priv->hw->link.xgmii.speed10000;
+			break;
+		case SPEED_5000:
+			ctrl |= priv->hw->link.xgmii.speed5000;
+			break;
+		case SPEED_2500:
+			ctrl |= priv->hw->link.xgmii.speed2500;
+			break;
+		default:
+			return;
+		}
+	} else {
+		switch (state->speed) {
+		case SPEED_2500:
+			ctrl |= priv->hw->link.speed2500;
+			break;
+		case SPEED_1000:
+			ctrl |= priv->hw->link.speed1000;
+			break;
+		case SPEED_100:
+			ctrl |= priv->hw->link.speed100;
+			break;
+		case SPEED_10:
+			ctrl |= priv->hw->link.speed10;
+			break;
+		default:
+			return;
 		}
-	} else if (priv->oldlink) {
-		new_state = true;
-		priv->oldlink = false;
-		priv->speed = SPEED_UNKNOWN;
-		priv->oldduplex = DUPLEX_UNKNOWN;
 	}
 
-	if (new_state && netif_msg_link(priv))
-		phy_print_status(phydev);
+	priv->speed = state->speed;
 
-	mutex_unlock(&priv->lock);
+	if (priv->plat->fix_mac_speed)
+		priv->plat->fix_mac_speed(priv->plat->bsp_priv, state->speed);
 
-	if (phydev->is_pseudo_fixed_link)
-		/* Stop PHY layer to call the hook to adjust the link in case
-		 * of a switch is attached to the stmmac driver.
-		 */
-		phydev->irq = PHY_IGNORE_INTERRUPT;
+	if (!state->duplex)
+		ctrl &= ~priv->hw->link.duplex;
 	else
-		/* At this stage, init the EEE if supported.
-		 * Never called in case of fixed_link.
-		 */
+		ctrl |= priv->hw->link.duplex;
+
+	/* Flow Control operation */
+	if (state->pause)
+		stmmac_mac_flow_ctrl(priv, state->duplex);
+
+	writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+}
+
+static void stmmac_mac_an_restart(struct phylink_config *config)
+{
+	/* Not Supported */
+}
+
+static void stmmac_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+
+	stmmac_mac_set(priv, priv->ioaddr, false);
+	priv->eee_active = false;
+	stmmac_eee_init(priv);
+	stmmac_set_eee_pls(priv, priv->hw, false);
+}
+
+static void stmmac_mac_link_up(struct phylink_config *config,
+			       unsigned int mode, phy_interface_t interface,
+			       struct phy_device *phy)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+
+	stmmac_mac_set(priv, priv->ioaddr, true);
+	if (phy && priv->dma_cap.eee) {
+		priv->eee_active = phy_init_eee(phy, 1) >= 0;
 		priv->eee_enabled = stmmac_eee_init(priv);
+		stmmac_set_eee_pls(priv, priv->hw, true);
+	}
 }
 
+static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
+	.validate = stmmac_validate,
+	.mac_link_state = stmmac_mac_link_state,
+	.mac_config = stmmac_mac_config,
+	.mac_an_restart = stmmac_mac_an_restart,
+	.mac_link_down = stmmac_mac_link_down,
+	.mac_link_up = stmmac_mac_link_up,
+};
+
 /**
  * stmmac_check_pcs_mode - verify if RGMII/SGMII is supported
  * @priv: driver private structure
@@ -965,79 +1001,48 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 static int stmmac_init_phy(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	struct phy_device *phydev;
-	char phy_id_fmt[MII_BUS_ID_SIZE + 3];
-	char bus_id[MII_BUS_ID_SIZE];
-	int interface = priv->plat->interface;
-	int max_speed = priv->plat->max_speed;
-	priv->oldlink = false;
-	priv->speed = SPEED_UNKNOWN;
-	priv->oldduplex = DUPLEX_UNKNOWN;
+	struct device_node *node;
+	int ret;
 
-	if (priv->plat->phy_node) {
-		phydev = of_phy_connect(dev, priv->plat->phy_node,
-					&stmmac_adjust_link, 0, interface);
-	} else {
-		snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
-			 priv->plat->bus_id);
+	node = priv->plat->phylink_node;
 
-		snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
-			 priv->plat->phy_addr);
-		netdev_dbg(priv->dev, "%s: trying to attach to %s\n", __func__,
-			   phy_id_fmt);
+	if (node)
+		ret = phylink_of_phy_connect(priv->phylink, node, 0);
 
-		phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link,
-				     interface);
-	}
+	/* Some DT bindings do not set-up the PHY handle. Let's try to
+	 * manually parse it
+	 */
+	if (!node || ret) {
+		int addr = priv->plat->phy_addr;
+		struct phy_device *phydev;
 
-	if (IS_ERR_OR_NULL(phydev)) {
-		netdev_err(priv->dev, "Could not attach to PHY\n");
-		if (!phydev)
+		phydev = mdiobus_get_phy(priv->mii, addr);
+		if (!phydev) {
+			netdev_err(priv->dev, "no phy at addr %d\n", addr);
 			return -ENODEV;
+		}
 
-		return PTR_ERR(phydev);
+		ret = phylink_connect_phy(priv->phylink, phydev);
 	}
 
-	/* Stop Advertising 1000BASE Capability if interface is not GMII */
-	if ((interface == PHY_INTERFACE_MODE_MII) ||
-	    (interface == PHY_INTERFACE_MODE_RMII) ||
-		(max_speed < 1000 && max_speed > 0))
-		phy_set_max_speed(phydev, SPEED_100);
+	return ret;
+}
 
-	/*
-	 * Half-duplex mode not supported with multiqueue
-	 * half-duplex can only works with single queue
-	 */
-	if (tx_cnt > 1) {
-		phy_remove_link_mode(phydev,
-				     ETHTOOL_LINK_MODE_10baseT_Half_BIT);
-		phy_remove_link_mode(phydev,
-				     ETHTOOL_LINK_MODE_100baseT_Half_BIT);
-		phy_remove_link_mode(phydev,
-				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
-	}
+static int stmmac_phy_setup(struct stmmac_priv *priv)
+{
+	struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+	int mode = priv->plat->interface;
+	struct phylink *phylink;
 
-	/*
-	 * Broken HW is sometimes missing the pull-up resistor on the
-	 * MDIO line, which results in reads to non-existent devices returning
-	 * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent
-	 * device as well.
-	 * Note: phydev->phy_id is the result of reading the UID PHY registers.
-	 */
-	if (!priv->plat->phy_node && phydev->phy_id == 0) {
-		phy_disconnect(phydev);
-		return -ENODEV;
-	}
+	priv->phylink_config.dev = &priv->dev->dev;
+	priv->phylink_config.type = PHYLINK_NETDEV;
 
-	/* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
-	 * subsequent PHY polling, make sure we force a link transition if
-	 * we have a UP/DOWN/UP transition
-	 */
-	if (phydev->is_pseudo_fixed_link)
-		phydev->irq = PHY_POLL;
+	phylink = phylink_create(&priv->phylink_config, fwnode,
+				 mode, &stmmac_phylink_mac_ops);
+	if (IS_ERR(phylink))
+		return PTR_ERR(phylink);
 
-	phy_attached_info(phydev);
+	priv->phylink = phylink;
 	return 0;
 }
 
@@ -1192,26 +1197,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 				  int i, gfp_t flags, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-	struct sk_buff *skb;
+	struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-	skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
-	if (!skb) {
-		netdev_err(priv->dev,
-			   "%s: Rx init fails; skb is NULL\n", __func__);
+	buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+	if (!buf->page)
 		return -ENOMEM;
-	}
-	rx_q->rx_skbuff[i] = skb;
-	rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
-						priv->dma_buf_sz,
-						DMA_FROM_DEVICE);
-	if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) {
-		netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
-		dev_kfree_skb_any(skb);
-		return -EINVAL;
-	}
-
-	stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[i]);
 
+	buf->addr = page_pool_get_dma_addr(buf->page);
+	stmmac_set_desc_addr(priv, p, buf->addr);
 	if (priv->dma_buf_sz == BUF_SIZE_16KiB)
 		stmmac_init_desc3(priv, p);
 
@@ -1227,13 +1220,11 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-	if (rx_q->rx_skbuff[i]) {
-		dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i],
-				 priv->dma_buf_sz, DMA_FROM_DEVICE);
-		dev_kfree_skb_any(rx_q->rx_skbuff[i]);
-	}
-	rx_q->rx_skbuff[i] = NULL;
+	if (buf->page)
+		page_pool_put_page(rx_q->page_pool, buf->page, false);
+	buf->page = NULL;
 }
 
 /**
@@ -1316,10 +1307,6 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 						     queue);
 			if (ret)
 				goto err_init_rx_buffers;
-
-			netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
-				  rx_q->rx_skbuff[i], rx_q->rx_skbuff[i]->data,
-				  (unsigned int)rx_q->rx_skbuff_dma[i]);
 		}
 
 		rx_q->cur_rx = 0;
@@ -1493,8 +1480,11 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 					  sizeof(struct dma_extended_desc),
 					  rx_q->dma_erx, rx_q->dma_rx_phy);
 
-		kfree(rx_q->rx_skbuff_dma);
-		kfree(rx_q->rx_skbuff);
+		kfree(rx_q->buf_pool);
+		if (rx_q->page_pool) {
+			page_pool_request_shutdown(rx_q->page_pool);
+			page_pool_destroy(rx_q->page_pool);
+		}
 	}
 }
 
@@ -1546,20 +1536,29 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 	/* RX queues buffers and DMA */
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+		struct page_pool_params pp_params = { 0 };
 
 		rx_q->queue_index = queue;
 		rx_q->priv_data = priv;
 
-		rx_q->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE,
-						    sizeof(dma_addr_t),
-						    GFP_KERNEL);
-		if (!rx_q->rx_skbuff_dma)
+		pp_params.flags = PP_FLAG_DMA_MAP;
+		pp_params.pool_size = DMA_RX_SIZE;
+		pp_params.order = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+		pp_params.nid = dev_to_node(priv->device);
+		pp_params.dev = priv->device;
+		pp_params.dma_dir = DMA_FROM_DEVICE;
+
+		rx_q->page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(rx_q->page_pool)) {
+			ret = PTR_ERR(rx_q->page_pool);
+			rx_q->page_pool = NULL;
 			goto err_dma;
+		}
 
-		rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
-						sizeof(struct sk_buff *),
-						GFP_KERNEL);
-		if (!rx_q->rx_skbuff)
+		rx_q->buf_pool = kmalloc_array(DMA_RX_SIZE,
+					       sizeof(*rx_q->buf_pool),
+					       GFP_KERNEL);
+		if (!rx_q->buf_pool)
 			goto err_dma;
 
 		if (priv->extend_desc) {
@@ -2049,14 +2048,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
 	struct stmmac_channel *ch = &priv->channel[chan];
 
 	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
-		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-		napi_schedule_irqoff(&ch->rx_napi);
+		if (napi_schedule_prep(&ch->rx_napi)) {
+			stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+			__napi_schedule_irqoff(&ch->rx_napi);
+			status |= handle_tx;
+		}
 	}
 
-	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
-		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
 		napi_schedule_irqoff(&ch->tx_napi);
-	}
 
 	return status;
 }
@@ -2118,10 +2118,10 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
 	unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
 			    MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
 
-	dwmac_mmc_intr_all_mask(priv->mmcaddr);
+	stmmac_mmc_intr_all_mask(priv, priv->mmcaddr);
 
 	if (priv->dma_cap.rmon) {
-		dwmac_mmc_ctrl(priv->mmcaddr, mode);
+		stmmac_mmc_ctrl(priv, priv->mmcaddr, mode);
 		memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
 	} else
 		netdev_info(priv->dev, "No MAC Management Counters available\n");
@@ -2154,8 +2154,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
 		stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
 		if (!is_valid_ether_addr(priv->dev->dev_addr))
 			eth_hw_addr_random(priv->dev);
-		netdev_info(priv->dev, "device MAC address %pM\n",
-			    priv->dev->dev_addr);
+		dev_info(priv->device, "device MAC address %pM\n",
+			 priv->dev->dev_addr);
 	}
 }
 
@@ -2262,20 +2262,21 @@ static void stmmac_tx_timer(struct timer_list *t)
 }
 
 /**
- * stmmac_init_tx_coalesce - init tx mitigation options.
+ * stmmac_init_coalesce - init mitigation options.
  * @priv: driver private structure
  * Description:
- * This inits the transmit coalesce parameters: i.e. timer rate,
+ * This inits the coalesce parameters: i.e. timer rate,
  * timer handler and default threshold used for enabling the
  * interrupt on completion bit.
  */
-static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
+static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
 	u32 tx_channel_count = priv->plat->tx_queues_to_use;
 	u32 chan;
 
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
+	priv->rx_coal_frames = STMMAC_RX_FRAMES;
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
@@ -2561,9 +2562,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
 	if (priv->use_riwt) {
-		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
 		if (!ret)
-			priv->rx_riwt = MAX_DMA_RIWT;
+			priv->rx_riwt = MIN_DMA_RIWT;
 	}
 
 	if (priv->hw->pcs)
@@ -2645,10 +2646,9 @@ static int stmmac_open(struct net_device *dev)
 		goto init_error;
 	}
 
-	stmmac_init_tx_coalesce(priv);
+	stmmac_init_coalesce(priv);
 
-	if (dev->phydev)
-		phy_start(dev->phydev);
+	phylink_start(priv->phylink);
 
 	/* Request the IRQ lines */
 	ret = request_irq(dev->irq, stmmac_interrupt,
@@ -2695,8 +2695,7 @@ lpiirq_error:
 wolirq_error:
 	free_irq(dev->irq, dev);
 irq_error:
-	if (dev->phydev)
-		phy_stop(dev->phydev);
+	phylink_stop(priv->phylink);
 
 	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
 		del_timer_sync(&priv->tx_queue[chan].txtimer);
@@ -2705,9 +2704,7 @@ irq_error:
 init_error:
 	free_dma_desc_resources(priv);
 dma_desc_error:
-	if (dev->phydev)
-		phy_disconnect(dev->phydev);
-
+	phylink_disconnect_phy(priv->phylink);
 	return ret;
 }
 
@@ -2726,10 +2723,8 @@ static int stmmac_release(struct net_device *dev)
 		del_timer_sync(&priv->eee_ctrl_timer);
 
 	/* Stop and disconnect the PHY */
-	if (dev->phydev) {
-		phy_stop(dev->phydev);
-		phy_disconnect(dev->phydev);
-	}
+	phylink_stop(priv->phylink);
+	phylink_disconnect_phy(priv->phylink);
 
 	stmmac_stop_all_queues(priv);
 
@@ -2772,7 +2767,7 @@ static int stmmac_release(struct net_device *dev)
  *  This function fills descriptor and request new descriptors according to
  *  buffer length to fill
  */
-static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
+static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
 				 int total_len, bool last_segment, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
@@ -2783,11 +2778,18 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 	tmp_len = total_len;
 
 	while (tmp_len > 0) {
+		dma_addr_t curr_addr;
+
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 		desc = tx_q->dma_tx + tx_q->cur_tx;
 
-		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
+		curr_addr = des + (total_len - tmp_len);
+		if (priv->dma_cap.addr64 <= 32)
+			desc->des0 = cpu_to_le32(curr_addr);
+		else
+			stmmac_set_desc_addr(priv, desc, curr_addr);
+
 		buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
 			    TSO_MAX_BUFF_SIZE : tmp_len;
 
@@ -2833,11 +2835,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	u32 queue = skb_get_queue_mapping(skb);
-	unsigned int first_entry, des;
+	unsigned int first_entry;
 	struct stmmac_tx_queue *tx_q;
 	int tmp_pay_len = 0;
 	u32 pay_len, mss;
 	u8 proto_hdr_len;
+	dma_addr_t des;
 	int i;
 
 	tx_q = &priv->tx_queue[queue];
@@ -2894,14 +2897,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_q->tx_skbuff_dma[first_entry].buf = des;
 	tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
 
-	first->des0 = cpu_to_le32(des);
+	if (priv->dma_cap.addr64 <= 32) {
+		first->des0 = cpu_to_le32(des);
 
-	/* Fill start of payload in buff2 of first descriptor */
-	if (pay_len)
-		first->des1 = cpu_to_le32(des + proto_hdr_len);
+		/* Fill start of payload in buff2 of first descriptor */
+		if (pay_len)
+			first->des1 = cpu_to_le32(des + proto_hdr_len);
 
-	/* If needed take extra descriptors to fill the remaining payload */
-	tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+		/* If needed take extra descriptors to fill the remaining payload */
+		tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+	} else {
+		stmmac_set_desc_addr(priv, first, des);
+		tmp_pay_len = pay_len;
+	}
 
 	stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
 
@@ -3031,12 +3039,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, csum_insertion = 0, is_jumbo = 0;
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
-	int entry;
-	unsigned int first_entry;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
+	unsigned int first_entry;
 	unsigned int enh_desc;
-	unsigned int des;
+	dma_addr_t des;
+	int entry;
 
 	tx_q = &priv->tx_queue[queue];
 
@@ -3045,17 +3053,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Manage oversized TCP frames for GMAC4 device */
 	if (skb_is_gso(skb) && priv->tso) {
-		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
-			/*
-			 * There is no way to determine the number of TSO
-			 * capable Queues. Let's use always the Queue 0
-			 * because if TSO is supported then at least this
-			 * one will be capable.
-			 */
-			skb_set_queue_mapping(skb, 0);
-
+		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
 			return stmmac_tso_xmit(skb, dev);
-		}
 	}
 
 	if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
@@ -3281,59 +3280,38 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 	int dirty = stmmac_rx_dirty(priv, queue);
 	unsigned int entry = rx_q->dirty_rx;
 
-	int bfsize = priv->dma_buf_sz;
-
 	while (dirty-- > 0) {
+		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
 		struct dma_desc *p;
+		bool use_rx_wd;
 
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(rx_q->dma_erx + entry);
 		else
 			p = rx_q->dma_rx + entry;
 
-		if (likely(!rx_q->rx_skbuff[entry])) {
-			struct sk_buff *skb;
-
-			skb = netdev_alloc_skb_ip_align(priv->dev, bfsize);
-			if (unlikely(!skb)) {
-				/* so for a while no zero-copy! */
-				rx_q->rx_zeroc_thresh = STMMAC_RX_THRESH;
-				if (unlikely(net_ratelimit()))
-					dev_err(priv->device,
-						"fail to alloc skb entry %d\n",
-						entry);
+		if (!buf->page) {
+			buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+			if (!buf->page)
 				break;
-			}
-
-			rx_q->rx_skbuff[entry] = skb;
-			rx_q->rx_skbuff_dma[entry] =
-			    dma_map_single(priv->device, skb->data, bfsize,
-					   DMA_FROM_DEVICE);
-			if (dma_mapping_error(priv->device,
-					      rx_q->rx_skbuff_dma[entry])) {
-				netdev_err(priv->dev, "Rx DMA map failed\n");
-				dev_kfree_skb(skb);
-				break;
-			}
-
-			stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[entry]);
-			stmmac_refill_desc3(priv, rx_q, p);
-
-			if (rx_q->rx_zeroc_thresh > 0)
-				rx_q->rx_zeroc_thresh--;
-
-			netif_dbg(priv, rx_status, priv->dev,
-				  "refill entry #%d\n", entry);
 		}
-		dma_wmb();
 
-		stmmac_set_rx_owner(priv, p, priv->use_riwt);
+		buf->addr = page_pool_get_dma_addr(buf->page);
+		stmmac_set_desc_addr(priv, p, buf->addr);
+		stmmac_refill_desc3(priv, rx_q, p);
+
+		rx_q->rx_count_frames++;
+		rx_q->rx_count_frames %= priv->rx_coal_frames;
+		use_rx_wd = priv->use_riwt && rx_q->rx_count_frames;
 
 		dma_wmb();
+		stmmac_set_rx_owner(priv, p, use_rx_wd);
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
 	}
 	rx_q->dirty_rx = entry;
+	rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+			    (rx_q->dirty_rx * sizeof(struct dma_desc));
 	stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
 }
 
@@ -3352,9 +3330,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int next_entry = rx_q->cur_rx;
 	int coe = priv->hw->rx_csum;
 	unsigned int count = 0;
-	bool xmac;
-
-	xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
 
 	if (netif_msg_rx_status(priv)) {
 		void *rx_head;
@@ -3368,11 +3343,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
 	}
 	while (count < limit) {
+		struct stmmac_rx_buffer *buf;
+		struct dma_desc *np, *p;
 		int entry, status;
-		struct dma_desc *p;
-		struct dma_desc *np;
 
 		entry = next_entry;
+		buf = &rx_q->buf_pool[entry];
 
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(rx_q->dma_erx + entry);
@@ -3402,20 +3378,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			stmmac_rx_extended_status(priv, &priv->dev->stats,
 					&priv->xstats, rx_q->dma_erx + entry);
 		if (unlikely(status == discard_frame)) {
+			page_pool_recycle_direct(rx_q->page_pool, buf->page);
 			priv->dev->stats.rx_errors++;
-			if (priv->hwts_rx_en && !priv->extend_desc) {
-				/* DESC2 & DESC3 will be overwritten by device
-				 * with timestamp value, hence reinitialize
-				 * them in stmmac_rx_refill() function so that
-				 * device can reuse it.
-				 */
-				dev_kfree_skb_any(rx_q->rx_skbuff[entry]);
-				rx_q->rx_skbuff[entry] = NULL;
-				dma_unmap_single(priv->device,
-						 rx_q->rx_skbuff_dma[entry],
-						 priv->dma_buf_sz,
-						 DMA_FROM_DEVICE);
-			}
+			buf->page = NULL;
 		} else {
 			struct sk_buff *skb;
 			int frame_len;
@@ -3455,58 +3420,20 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 					   frame_len, status);
 			}
 
-			/* The zero-copy is always used for all the sizes
-			 * in case of GMAC4 because it needs
-			 * to refill the used descriptors, always.
-			 */
-			if (unlikely(!xmac &&
-				     ((frame_len < priv->rx_copybreak) ||
-				     stmmac_rx_threshold_count(rx_q)))) {
-				skb = netdev_alloc_skb_ip_align(priv->dev,
-								frame_len);
-				if (unlikely(!skb)) {
-					if (net_ratelimit())
-						dev_warn(priv->device,
-							 "packet dropped\n");
-					priv->dev->stats.rx_dropped++;
-					continue;
-				}
-
-				dma_sync_single_for_cpu(priv->device,
-							rx_q->rx_skbuff_dma
-							[entry], frame_len,
-							DMA_FROM_DEVICE);
-				skb_copy_to_linear_data(skb,
-							rx_q->
-							rx_skbuff[entry]->data,
-							frame_len);
-
-				skb_put(skb, frame_len);
-				dma_sync_single_for_device(priv->device,
-							   rx_q->rx_skbuff_dma
-							   [entry], frame_len,
-							   DMA_FROM_DEVICE);
-			} else {
-				skb = rx_q->rx_skbuff[entry];
-				if (unlikely(!skb)) {
-					if (net_ratelimit())
-						netdev_err(priv->dev,
-							   "%s: Inconsistent Rx chain\n",
-							   priv->dev->name);
-					priv->dev->stats.rx_dropped++;
-					continue;
-				}
-				prefetch(skb->data - NET_IP_ALIGN);
-				rx_q->rx_skbuff[entry] = NULL;
-				rx_q->rx_zeroc_thresh++;
-
-				skb_put(skb, frame_len);
-				dma_unmap_single(priv->device,
-						 rx_q->rx_skbuff_dma[entry],
-						 priv->dma_buf_sz,
-						 DMA_FROM_DEVICE);
+			skb = netdev_alloc_skb_ip_align(priv->dev, frame_len);
+			if (unlikely(!skb)) {
+				priv->dev->stats.rx_dropped++;
+				continue;
 			}
 
+			dma_sync_single_for_cpu(priv->device, buf->addr,
+						frame_len, DMA_FROM_DEVICE);
+			skb_copy_to_linear_data(skb, page_address(buf->page),
+						frame_len);
+			skb_put(skb, frame_len);
+			dma_sync_single_for_device(priv->device, buf->addr,
+						   frame_len, DMA_FROM_DEVICE);
+
 			if (netif_msg_pktdata(priv)) {
 				netdev_dbg(priv->dev, "frame received (%dbytes)",
 					   frame_len);
@@ -3526,6 +3453,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
 			napi_gro_receive(&ch->rx_napi, skb);
 
+			/* Data payload copied into SKB, page ready for recycle */
+			page_pool_recycle_direct(rx_q->page_pool, buf->page);
+			buf->page = NULL;
+
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
 		}
@@ -3568,8 +3499,8 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
 	work_done = min(work_done, budget);
 
-	if (work_done < budget && napi_complete_done(napi, work_done))
-		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+	if (work_done < budget)
+		napi_complete_done(napi, work_done);
 
 	/* Force transmission restart */
 	tx_q = &priv->tx_queue[chan];
@@ -3792,6 +3723,7 @@ static void stmmac_poll_controller(struct net_device *dev)
  */
 static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
+	struct stmmac_priv *priv = netdev_priv (dev);
 	int ret = -EOPNOTSUPP;
 
 	if (!netif_running(dev))
@@ -3801,9 +3733,7 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!dev->phydev)
-			return -EINVAL;
-		ret = phy_mii_ioctl(dev->phydev, rq, cmd);
+		ret = phylink_mii_ioctl(priv->phylink, rq, cmd);
 		break;
 	case SIOCSHWTSTAMP:
 		ret = stmmac_hwtstamp_set(dev, rq);
@@ -3839,23 +3769,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	return ret;
 }
 
-static int stmmac_setup_tc_block(struct stmmac_priv *priv,
-				 struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
-				priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(stmmac_block_cb_list);
 
 static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 			   void *type_data)
@@ -3864,7 +3778,10 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return stmmac_setup_tc_block(priv, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &stmmac_block_cb_list,
+						  stmmac_setup_tc_block_cb,
+						  priv, priv, true);
 	case TC_SETUP_QDISC_CBS:
 		return stmmac_tc_setup_cbs(priv, priv, type_data);
 	default:
@@ -3872,6 +3789,22 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 	}
 }
 
+static u16 stmmac_select_queue(struct net_device *dev, struct sk_buff *skb,
+			       struct net_device *sb_dev)
+{
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+		/*
+		 * There is no way to determine the number of TSO
+		 * capable Queues. Let's use always the Queue 0
+		 * because if TSO is supported then at least this
+		 * one will be capable.
+		 */
+		return 0;
+	}
+
+	return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues;
+}
+
 static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -4088,6 +4021,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_tx_timeout = stmmac_tx_timeout,
 	.ndo_do_ioctl = stmmac_ioctl,
 	.ndo_setup_tc = stmmac_setup_tc,
+	.ndo_select_queue = stmmac_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = stmmac_poll_controller,
 #endif
@@ -4160,6 +4094,12 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		priv->plat->enh_desc = priv->dma_cap.enh_desc;
 		priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up;
 		priv->hw->pmt = priv->plat->pmt;
+		if (priv->dma_cap.hash_tb_sz) {
+			priv->hw->multicast_filter_bins =
+					(BIT(priv->dma_cap.hash_tb_sz) << 5);
+			priv->hw->mcast_bits_log2 =
+					ilog2(priv->hw->multicast_filter_bins);
+		}
 
 		/* TXCOE doesn't work in thresh DMA mode */
 		if (priv->plat->force_thresh_dma_mode)
@@ -4237,9 +4177,8 @@ int stmmac_dvr_probe(struct device *device,
 	u32 queue, maxq;
 	int ret = 0;
 
-	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
-				  MTL_MAX_TX_QUEUES,
-				  MTL_MAX_RX_QUEUES);
+	ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
+				       MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES);
 	if (!ndev)
 		return -ENOMEM;
 
@@ -4271,8 +4210,7 @@ int stmmac_dvr_probe(struct device *device,
 	priv->wq = create_singlethread_workqueue("stmmac_wq");
 	if (!priv->wq) {
 		dev_err(priv->device, "failed to create workqueue\n");
-		ret = -ENOMEM;
-		goto error_wq;
+		return -ENOMEM;
 	}
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
@@ -4319,6 +4257,24 @@ int stmmac_dvr_probe(struct device *device,
 		priv->tso = true;
 		dev_info(priv->device, "TSO feature enabled\n");
 	}
+
+	if (priv->dma_cap.addr64) {
+		ret = dma_set_mask_and_coherent(device,
+				DMA_BIT_MASK(priv->dma_cap.addr64));
+		if (!ret) {
+			dev_info(priv->device, "Using %d bits DMA width\n",
+				 priv->dma_cap.addr64);
+		} else {
+			ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
+			if (ret) {
+				dev_err(priv->device, "Failed to set DMA Mask\n");
+				goto error_hw_init;
+			}
+
+			priv->dma_cap.addr64 = 32;
+		}
+	}
+
 	ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
 	ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
 #ifdef STMMAC_VLAN_TAG_USED
@@ -4396,6 +4352,12 @@ int stmmac_dvr_probe(struct device *device,
 		}
 	}
 
+	ret = stmmac_phy_setup(priv);
+	if (ret) {
+		netdev_err(ndev, "failed to setup phy (%d)\n", ret);
+		goto error_phy_setup;
+	}
+
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(priv->device, "%s: ERROR %i registering the device\n",
@@ -4413,6 +4375,8 @@ int stmmac_dvr_probe(struct device *device,
 	return ret;
 
 error_netdev_register:
+	phylink_destroy(priv->phylink);
+error_phy_setup:
 	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
 	    priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
@@ -4428,8 +4392,6 @@ error_mdio_register:
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
-error_wq:
-	free_netdev(ndev);
 
 	return ret;
 }
@@ -4456,6 +4418,7 @@ int stmmac_dvr_remove(struct device *dev)
 	stmmac_mac_set(priv, priv->ioaddr, false);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
+	phylink_destroy(priv->phylink);
 	if (priv->plat->stmmac_rst)
 		reset_control_assert(priv->plat->stmmac_rst);
 	clk_disable_unprepare(priv->plat->pclk);
@@ -4466,7 +4429,6 @@ int stmmac_dvr_remove(struct device *dev)
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
 	mutex_destroy(&priv->lock);
-	free_netdev(ndev);
 
 	return 0;
 }
@@ -4487,8 +4449,7 @@ int stmmac_suspend(struct device *dev)
 	if (!ndev || !netif_running(ndev))
 		return 0;
 
-	if (ndev->phydev)
-		phy_stop(ndev->phydev);
+	phylink_stop(priv->phylink);
 
 	mutex_lock(&priv->lock);
 
@@ -4513,9 +4474,7 @@ int stmmac_suspend(struct device *dev)
 	}
 	mutex_unlock(&priv->lock);
 
-	priv->oldlink = false;
 	priv->speed = SPEED_UNKNOWN;
-	priv->oldduplex = DUPLEX_UNKNOWN;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(stmmac_suspend);
@@ -4590,7 +4549,7 @@ int stmmac_resume(struct device *dev)
 	stmmac_clear_descriptors(priv);
 
 	stmmac_hw_setup(ndev, false);
-	stmmac_init_tx_coalesce(priv);
+	stmmac_init_coalesce(priv);
 	stmmac_set_rx_mode(ndev);
 
 	stmmac_enable_all_queues(priv);
@@ -4599,8 +4558,7 @@ int stmmac_resume(struct device *dev)
 
 	mutex_unlock(&priv->lock);
 
-	if (ndev->phydev)
-		phy_start(ndev->phydev);
+	phylink_start(priv->phylink);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 1341bb5f693c..4304c1abc5d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -10,13 +10,13 @@
   Maintainer: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#include <linux/gpio/consumer.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/mii.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 
 #include "dwxgmac2.h"
@@ -24,11 +24,14 @@
 
 #define MII_BUSY 0x00000001
 #define MII_WRITE 0x00000002
+#define MII_DATA_MASK GENMASK(15, 0)
 
 /* GMAC4 defines */
 #define MII_GMAC4_GOC_SHIFT		2
+#define MII_GMAC4_REG_ADDR_SHIFT	16
 #define MII_GMAC4_WRITE			(1 << MII_GMAC4_GOC_SHIFT)
 #define MII_GMAC4_READ			(3 << MII_GMAC4_GOC_SHIFT)
+#define MII_GMAC4_C45E			BIT(1)
 
 /* XGMAC defines */
 #define MII_XGMAC_SADDR			BIT(18)
@@ -155,22 +158,34 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
-	u32 v;
-	int data;
 	u32 value = MII_BUSY;
+	int data = 0;
+	u32 v;
 
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
 	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
 		& priv->hw->mii.clk_csr_mask;
-	if (priv->plat->has_gmac4)
+	if (priv->plat->has_gmac4) {
 		value |= MII_GMAC4_READ;
+		if (phyreg & MII_ADDR_C45) {
+			value |= MII_GMAC4_C45E;
+			value &= ~priv->hw->mii.reg_mask;
+			value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) <<
+			       priv->hw->mii.reg_shift) &
+			       priv->hw->mii.reg_mask;
+
+			data |= (phyreg & MII_REGADDR_C45_MASK) <<
+				MII_GMAC4_REG_ADDR_SHIFT;
+		}
+	}
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
 			       100, 10000))
 		return -EBUSY;
 
+	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
@@ -178,7 +193,7 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 		return -EBUSY;
 
 	/* Read the data from the MII data register */
-	data = (int)readl(priv->ioaddr + mii_data);
+	data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK;
 
 	return data;
 }
@@ -198,8 +213,9 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
-	u32 v;
 	u32 value = MII_BUSY;
+	int data = phydata;
+	u32 v;
 
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
@@ -207,10 +223,21 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 
 	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
 		& priv->hw->mii.clk_csr_mask;
-	if (priv->plat->has_gmac4)
+	if (priv->plat->has_gmac4) {
 		value |= MII_GMAC4_WRITE;
-	else
+		if (phyreg & MII_ADDR_C45) {
+			value |= MII_GMAC4_C45E;
+			value &= ~priv->hw->mii.reg_mask;
+			value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) <<
+			       priv->hw->mii.reg_shift) &
+			       priv->hw->mii.reg_mask;
+
+			data |= (phyreg & MII_REGADDR_C45_MASK) <<
+				MII_GMAC4_REG_ADDR_SHIFT;
+		}
+	} else {
 		value |= MII_WRITE;
+	}
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
@@ -218,7 +245,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 		return -EBUSY;
 
 	/* Set the MII address register to write */
-	writel(phydata, priv->ioaddr + mii_data);
+	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	/* Wait until any existing MII operation is complete */
@@ -237,51 +264,35 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
-	struct stmmac_mdio_bus_data *data = priv->plat->mdio_bus_data;
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node) {
-		if (data->reset_gpio < 0) {
-			struct device_node *np = priv->device->of_node;
+		struct gpio_desc *reset_gpio;
+		u32 delays[3] = { 0, 0, 0 };
 
-			if (!np)
-				return 0;
+		reset_gpio = devm_gpiod_get_optional(priv->device,
+						     "snps,reset",
+						     GPIOD_OUT_LOW);
+		if (IS_ERR(reset_gpio))
+			return PTR_ERR(reset_gpio);
 
-			data->reset_gpio = of_get_named_gpio(np,
-						"snps,reset-gpio", 0);
-			if (data->reset_gpio < 0)
-				return 0;
+		device_property_read_u32_array(priv->device,
+					       "snps,reset-delays-us",
+					       delays, ARRAY_SIZE(delays));
 
-			data->active_low = of_property_read_bool(np,
-						"snps,reset-active-low");
-			of_property_read_u32_array(np,
-				"snps,reset-delays-us", data->delays, 3);
+		if (delays[0])
+			msleep(DIV_ROUND_UP(delays[0], 1000));
 
-			if (devm_gpio_request(priv->device, data->reset_gpio,
-					      "mdio-reset"))
-				return 0;
-		}
-
-		gpio_direction_output(data->reset_gpio,
-				      data->active_low ? 1 : 0);
-		if (data->delays[0])
-			msleep(DIV_ROUND_UP(data->delays[0], 1000));
+		gpiod_set_value_cansleep(reset_gpio, 1);
+		if (delays[1])
+			msleep(DIV_ROUND_UP(delays[1], 1000));
 
-		gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
-		if (data->delays[1])
-			msleep(DIV_ROUND_UP(data->delays[1], 1000));
-
-		gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
-		if (data->delays[2])
-			msleep(DIV_ROUND_UP(data->delays[2], 1000));
+		gpiod_set_value_cansleep(reset_gpio, 0);
+		if (delays[2])
+			msleep(DIV_ROUND_UP(delays[2], 1000));
 	}
 #endif
 
-	if (data->phy_reset) {
-		netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n");
-		data->phy_reset(priv->plat->bsp_priv);
-	}
-
 	/* This is a workaround for problems with the STE101P PHY.
 	 * It doesn't complete its reset until at least one clock cycle
 	 * on MDC, so perform a dummy mdio read. To be updated for GMAC4
@@ -318,11 +329,6 @@ int stmmac_mdio_register(struct net_device *ndev)
 	if (mdio_bus_data->irqs)
 		memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq));
 
-#ifdef CONFIG_OF
-	if (priv->device->of_node)
-		mdio_bus_data->reset_gpio = -1;
-#endif
-
 	new_bus->name = "stmmac";
 
 	if (priv->plat->has_xgmac) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 0bd72739a071..86f9c07a38cf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -63,7 +63,6 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 	plat->has_gmac = 1;
 	plat->force_sf_dma_mode = 1;
 
-	plat->mdio_bus_data->phy_reset = NULL;
 	plat->mdio_bus_data->phy_mask = 0;
 
 	/* Set default value for multicast hash bins */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 0f0f4b31eb7e..73fc2524372e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -323,21 +323,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		{},
 	};
 
-	/* If phy-handle property is passed from DT, use it as the PHY */
-	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
-	if (plat->phy_node)
-		dev_dbg(dev, "Found phy-handle subnode\n");
-
-	/* If phy-handle is not specified, check if we have a fixed-phy */
-	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
-		if ((of_phy_register_fixed_link(np) < 0))
-			return -ENODEV;
-
-		dev_dbg(dev, "Found fixed-link subnode\n");
-		plat->phy_node = of_node_get(np);
-		mdio = false;
-	}
-
 	if (of_match_node(need_mdio_ids, np)) {
 		plat->mdio_node = of_get_child_by_name(np, "mdio");
 	} else {
@@ -387,6 +372,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	*mac = of_get_mac_address(np);
 	plat->interface = of_get_phy_mode(np);
 
+	/* Some wrapper drivers still rely on phy_node. Let's save it while
+	 * they are not converted to phylink. */
+	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+
+	/* PHYLINK automatically parses the phy-handle property */
+	plat->phylink_node = np;
+
 	/* Get max speed of operation from device tree */
 	if (of_property_read_u32(np, "max-speed", &plat->max_speed))
 		plat->max_speed = -1;
@@ -581,10 +573,6 @@ error_pclk_get:
 void stmmac_remove_config_dt(struct platform_device *pdev,
 			     struct plat_stmmacenet_data *plat)
 {
-	struct device_node *np = pdev->dev.of_node;
-
-	if (of_phy_is_fixed_link(np))
-		of_phy_deregister_fixed_link(np);
 	of_node_put(plat->phy_node);
 	of_node_put(plat->mdio_node);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
new file mode 100644
index 000000000000..a97b1ea76438
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates.
+ * stmmac Selftests Support
+ *
+ * Author: Jose Abreu <joabreu@synopsys.com>
+ */
+
+#include <linux/completion.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/phy.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include "stmmac.h"
+
+struct stmmachdr {
+	__be32 version;
+	__be64 magic;
+	u8 id;
+} __packed;
+
+#define STMMAC_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			      sizeof(struct stmmachdr))
+#define STMMAC_TEST_PKT_MAGIC	0xdeadcafecafedeadULL
+#define STMMAC_LB_TIMEOUT	msecs_to_jiffies(200)
+
+struct stmmac_packet_attrs {
+	int vlan;
+	int vlan_id_in;
+	int vlan_id_out;
+	unsigned char *src;
+	unsigned char *dst;
+	u32 ip_src;
+	u32 ip_dst;
+	int tcp;
+	int sport;
+	int dport;
+	u32 exp_hash;
+	int dont_wait;
+	int timeout;
+	int size;
+	int remove_sa;
+	u8 id;
+};
+
+static u8 stmmac_test_next_id;
+
+static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
+					       struct stmmac_packet_attrs *attr)
+{
+	struct sk_buff *skb = NULL;
+	struct udphdr *uhdr = NULL;
+	struct tcphdr *thdr = NULL;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct iphdr *ihdr;
+	int iplen, size;
+
+	size = attr->size + STMMAC_TEST_PKT_SIZE;
+	if (attr->vlan) {
+		size += 4;
+		if (attr->vlan > 1)
+			size += 4;
+	}
+
+	if (attr->tcp)
+		size += sizeof(struct tcphdr);
+	else
+		size += sizeof(struct udphdr);
+
+	skb = netdev_alloc_skb(priv->dev, size);
+	if (!skb)
+		return NULL;
+
+	prefetchw(skb->data);
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	if (attr->vlan > 1)
+		ehdr = skb_push(skb, ETH_HLEN + 8);
+	else if (attr->vlan)
+		ehdr = skb_push(skb, ETH_HLEN + 4);
+	else if (attr->remove_sa)
+		ehdr = skb_push(skb, ETH_HLEN - 6);
+	else
+		ehdr = skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	skb_set_network_header(skb, skb->len);
+	ihdr = skb_put(skb, sizeof(*ihdr));
+
+	skb_set_transport_header(skb, skb->len);
+	if (attr->tcp)
+		thdr = skb_put(skb, sizeof(*thdr));
+	else
+		uhdr = skb_put(skb, sizeof(*uhdr));
+
+	if (!attr->remove_sa)
+		eth_zero_addr(ehdr->h_source);
+	eth_zero_addr(ehdr->h_dest);
+	if (attr->src && !attr->remove_sa)
+		ether_addr_copy(ehdr->h_source, attr->src);
+	if (attr->dst)
+		ether_addr_copy(ehdr->h_dest, attr->dst);
+
+	if (!attr->remove_sa) {
+		ehdr->h_proto = htons(ETH_P_IP);
+	} else {
+		__be16 *ptr = (__be16 *)ehdr;
+
+		/* HACK */
+		ptr[3] = htons(ETH_P_IP);
+	}
+
+	if (attr->vlan) {
+		__be16 *tag, *proto;
+
+		if (!attr->remove_sa) {
+			tag = (void *)ehdr + ETH_HLEN;
+			proto = (void *)ehdr + (2 * ETH_ALEN);
+		} else {
+			tag = (void *)ehdr + ETH_HLEN - 6;
+			proto = (void *)ehdr + ETH_ALEN;
+		}
+
+		proto[0] = htons(ETH_P_8021Q);
+		tag[0] = htons(attr->vlan_id_out);
+		tag[1] = htons(ETH_P_IP);
+		if (attr->vlan > 1) {
+			proto[0] = htons(ETH_P_8021AD);
+			tag[1] = htons(ETH_P_8021Q);
+			tag[2] = htons(attr->vlan_id_in);
+			tag[3] = htons(ETH_P_IP);
+		}
+	}
+
+	if (attr->tcp) {
+		thdr->source = htons(attr->sport);
+		thdr->dest = htons(attr->dport);
+		thdr->doff = sizeof(struct tcphdr) / 4;
+		thdr->check = 0;
+	} else {
+		uhdr->source = htons(attr->sport);
+		uhdr->dest = htons(attr->dport);
+		uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+		uhdr->check = 0;
+	}
+
+	ihdr->ihl = 5;
+	ihdr->ttl = 32;
+	ihdr->version = 4;
+	if (attr->tcp)
+		ihdr->protocol = IPPROTO_TCP;
+	else
+		ihdr->protocol = IPPROTO_UDP;
+	iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size;
+	if (attr->tcp)
+		iplen += sizeof(*thdr);
+	else
+		iplen += sizeof(*uhdr);
+	ihdr->tot_len = htons(iplen);
+	ihdr->frag_off = 0;
+	ihdr->saddr = 0;
+	ihdr->daddr = htonl(attr->ip_dst);
+	ihdr->tos = 0;
+	ihdr->id = 0;
+	ip_send_check(ihdr);
+
+	shdr = skb_put(skb, sizeof(*shdr));
+	shdr->version = 0;
+	shdr->magic = cpu_to_be64(STMMAC_TEST_PKT_MAGIC);
+	attr->id = stmmac_test_next_id;
+	shdr->id = stmmac_test_next_id++;
+
+	if (attr->size)
+		skb_put(skb, attr->size);
+
+	skb->csum = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	if (attr->tcp) {
+		thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, ihdr->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct tcphdr, check);
+	} else {
+		udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->dev;
+
+	return skb;
+}
+
+struct stmmac_test_priv {
+	struct stmmac_packet_attrs *packet;
+	struct packet_type pt;
+	struct completion comp;
+	int double_vlan;
+	int vlan_id;
+	int ok;
+};
+
+static int stmmac_test_loopback_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct tcphdr *thdr;
+	struct iphdr *ihdr;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (tpriv->packet->dst) {
+		if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+			goto out;
+	}
+	if (tpriv->packet->src) {
+		if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+			goto out;
+	}
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+
+	if (tpriv->packet->tcp) {
+		if (ihdr->protocol != IPPROTO_TCP)
+			goto out;
+
+		thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (thdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)thdr + sizeof(*thdr));
+	} else {
+		if (ihdr->protocol != IPPROTO_UDP)
+			goto out;
+
+		uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (uhdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+	}
+
+	if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+		goto out;
+	if (tpriv->packet->exp_hash && !skb->hash)
+		goto out;
+	if (tpriv->packet->id != shdr->id)
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int __stmmac_test_loopback(struct stmmac_priv *priv,
+				  struct stmmac_packet_attrs *attr)
+{
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_loopback_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = attr;
+	dev_add_pack(&tpriv->pt);
+
+	skb = stmmac_test_get_udp_skb(priv, attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	skb_set_queue_mapping(skb, 0);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto cleanup;
+
+	if (attr->dont_wait)
+		goto cleanup;
+
+	if (!attr->timeout)
+		attr->timeout = STMMAC_LB_TIMEOUT;
+
+	wait_for_completion_timeout(&tpriv->comp, attr->timeout);
+	ret = !tpriv->ok;
+
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_mac_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+
+	attr.dst = priv->dev->dev_addr;
+	return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_phy_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dev->phydev)
+		return -EBUSY;
+
+	ret = phy_loopback(priv->dev->phydev, true);
+	if (ret)
+		return ret;
+
+	attr.dst = priv->dev->dev_addr;
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	phy_loopback(priv->dev->phydev, false);
+	return ret;
+}
+
+static int stmmac_test_mmc(struct stmmac_priv *priv)
+{
+	struct stmmac_counters initial, final;
+	int ret;
+
+	memset(&initial, 0, sizeof(initial));
+	memset(&final, 0, sizeof(final));
+
+	if (!priv->dma_cap.rmon)
+		return -EOPNOTSUPP;
+
+	/* Save previous results into internal struct */
+	stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		return ret;
+
+	/* These will be loopback results so no need to save them */
+	stmmac_mmc_read(priv, priv->mmcaddr, &final);
+
+	/*
+	 * The number of MMC counters available depends on HW configuration
+	 * so we just use this one to validate the feature. I hope there is
+	 * not a version without this counter.
+	 */
+	if (final.mmc_tx_framecount_g <= initial.mmc_tx_framecount_g)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stmmac_test_eee(struct stmmac_priv *priv)
+{
+	struct stmmac_extra_stats *initial, *final;
+	int retries = 10;
+	int ret;
+
+	if (!priv->dma_cap.eee || !priv->eee_active)
+		return -EOPNOTSUPP;
+
+	initial = kzalloc(sizeof(*initial), GFP_KERNEL);
+	if (!initial)
+		return -ENOMEM;
+
+	final = kzalloc(sizeof(*final), GFP_KERNEL);
+	if (!final) {
+		ret = -ENOMEM;
+		goto out_free_initial;
+	}
+
+	memcpy(initial, &priv->xstats, sizeof(*initial));
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		goto out_free_final;
+
+	/* We have no traffic in the line so, sooner or later it will go LPI */
+	while (--retries) {
+		memcpy(final, &priv->xstats, sizeof(*final));
+
+		if (final->irq_tx_path_in_lpi_mode_n >
+		    initial->irq_tx_path_in_lpi_mode_n)
+			break;
+		msleep(100);
+	}
+
+	if (!retries) {
+		ret = -ETIMEDOUT;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_in_lpi_mode_n <=
+	    initial->irq_tx_path_in_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_exit_lpi_mode_n <=
+	    initial->irq_tx_path_exit_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+out_free_final:
+	kfree(final);
+out_free_initial:
+	kfree(initial);
+	return ret;
+}
+
+static int stmmac_filter_check(struct stmmac_priv *priv)
+{
+	if (!(priv->dev->flags & IFF_PROMISC))
+		return 0;
+
+	netdev_warn(priv->dev, "Test can't be run in promiscuous mode!\n");
+	return -EOPNOTSUPP;
+}
+
+static int stmmac_test_hfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
+	unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	ret = stmmac_filter_check(priv);
+	if (ret)
+		return ret;
+
+	ret = dev_mc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_mc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_test_pfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	ret = dev_uc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_uc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr)
+{
+	return 0;
+}
+
+static void stmmac_test_set_rx_mode(struct net_device *netdev)
+{
+	/* As we are in test mode of ethtool we already own the rtnl lock
+	 * so no address will change from user. We can just call the
+	 * ndo_set_rx_mode() callback directly */
+	if (netdev->netdev_ops->ndo_set_rx_mode)
+		netdev->netdev_ops->ndo_set_rx_mode(netdev);
+}
+
+static int stmmac_test_mcfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	/* Remove all MC addresses */
+	__dev_mc_unsync(priv->dev, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+
+	ret = dev_uc_add(priv->dev, uc_addr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = uc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = mc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_uc_del(priv->dev, uc_addr);
+	__dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+	return ret;
+}
+
+static int stmmac_test_ucfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	/* Remove all UC addresses */
+	__dev_uc_unsync(priv->dev, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+
+	ret = dev_mc_add(priv->dev, mc_addr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = mc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = uc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_mc_del(priv->dev, mc_addr);
+	__dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+	return ret;
+}
+
+static int stmmac_test_flowctrl_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct ethhdr *ehdr;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+		goto out;
+	if (ehdr->h_proto != htons(ETH_P_PAUSE))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_flowctrl(struct stmmac_priv *priv)
+{
+	unsigned char paddr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x01};
+	struct phy_device *phydev = priv->dev->phydev;
+	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	struct stmmac_test_priv *tpriv;
+	unsigned int pkt_count;
+	int i, ret = 0;
+
+	if (!phydev || !phydev->pause)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+	tpriv->pt.type = htons(ETH_P_PAUSE);
+	tpriv->pt.func = stmmac_test_flowctrl_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	dev_add_pack(&tpriv->pt);
+
+	/* Compute minimum number of packets to make FIFO full */
+	pkt_count = priv->plat->rx_fifo_size;
+	if (!pkt_count)
+		pkt_count = priv->dma_cap.rx_fifo_size;
+	pkt_count /= 1400;
+	pkt_count *= 2;
+
+	for (i = 0; i < rx_cnt; i++)
+		stmmac_stop_rx(priv, priv->ioaddr, i);
+
+	ret = dev_set_promiscuity(priv->dev, 1);
+	if (ret)
+		goto cleanup;
+
+	ret = dev_mc_add(priv->dev, paddr);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < pkt_count; i++) {
+		struct stmmac_packet_attrs attr = { };
+
+		attr.dst = priv->dev->dev_addr;
+		attr.dont_wait = true;
+		attr.size = 1400;
+
+		ret = __stmmac_test_loopback(priv, &attr);
+		if (ret)
+			goto cleanup;
+		if (tpriv->ok)
+			break;
+	}
+
+	/* Wait for some time in case RX Watchdog is enabled */
+	msleep(200);
+
+	for (i = 0; i < rx_cnt; i++) {
+		struct stmmac_channel *ch = &priv->channel[i];
+
+		stmmac_start_rx(priv, priv->ioaddr, i);
+		local_bh_disable();
+		napi_reschedule(&ch->rx_napi);
+		local_bh_enable();
+	}
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = !tpriv->ok;
+
+cleanup:
+	dev_mc_del(priv->dev, paddr);
+	dev_set_promiscuity(priv->dev, -1);
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+#define STMMAC_LOOPBACK_NONE	0
+#define STMMAC_LOOPBACK_MAC	1
+#define STMMAC_LOOPBACK_PHY	2
+
+static const struct stmmac_test {
+	char name[ETH_GSTRING_LEN];
+	int lb;
+	int (*fn)(struct stmmac_priv *priv);
+} stmmac_selftests[] = {
+	{
+		.name = "MAC Loopback         ",
+		.lb = STMMAC_LOOPBACK_MAC,
+		.fn = stmmac_test_mac_loopback,
+	}, {
+		.name = "PHY Loopback         ",
+		.lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
+		.fn = stmmac_test_phy_loopback,
+	}, {
+		.name = "MMC Counters         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mmc,
+	}, {
+		.name = "EEE                  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_eee,
+	}, {
+		.name = "Hash Filter MC       ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_hfilt,
+	}, {
+		.name = "Perfect Filter UC    ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_pfilt,
+	}, {
+		.name = "MC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mcfilt,
+	}, {
+		.name = "UC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_ucfilt,
+	}, {
+		.name = "Flow Control         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_flowctrl,
+	},
+};
+
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int count = stmmac_selftest_get_count(priv);
+	int carrier = netif_carrier_ok(dev);
+	int i, ret;
+
+	memset(buf, 0, sizeof(*buf) * count);
+	stmmac_test_next_id = 0;
+
+	if (etest->flags != ETH_TEST_FL_OFFLINE) {
+		netdev_err(priv->dev, "Only offline tests are supported\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	} else if (!carrier) {
+		netdev_err(priv->dev, "You need valid Link to execute tests\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+	/* We don't want extra traffic */
+	netif_carrier_off(dev);
+
+	/* Wait for queues drain */
+	msleep(200);
+
+	for (i = 0; i < count; i++) {
+		ret = 0;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, true);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			ret = stmmac_set_mac_loopback(priv, priv->ioaddr, true);
+			break;
+		case STMMAC_LOOPBACK_NONE:
+			break;
+		default:
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		/*
+		 * First tests will always be MAC / PHY loobpack. If any of
+		 * them is not supported we abort earlier.
+		 */
+		if (ret) {
+			netdev_err(priv->dev, "Loopback is not supported\n");
+			etest->flags |= ETH_TEST_FL_FAILED;
+			break;
+		}
+
+		ret = stmmac_selftests[i].fn(priv);
+		if (ret && (ret != -EOPNOTSUPP))
+			etest->flags |= ETH_TEST_FL_FAILED;
+		buf[i] = ret;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, false);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			stmmac_set_mac_loopback(priv, priv->ioaddr, false);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Restart everything */
+	if (carrier)
+		netif_carrier_on(dev);
+}
+
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	for (i = 0; i < stmmac_selftest_get_count(priv); i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
+			 stmmac_selftests[i].name);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return ARRAY_SIZE(stmmac_selftests);
+}
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 6f99437a6962..0bc5863bffeb 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1217,8 +1217,6 @@ static int link_status_1g_rgmii(struct niu *np, int *link_up_p)
 
 	spin_lock_irqsave(&np->lock, flags);
 
-	err = -EINVAL;
-
 	err = mii_read(np, np->phy_addr, MII_BMSR);
 	if (err < 0)
 		goto out;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index bd05a977ee7e..834afca3a019 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -50,6 +50,7 @@ config TI_CPSW
 	depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
 	select TI_DAVINCI_MDIO
 	select MFD_SYSCON
+	select PAGE_POOL
 	select REGMAP
 	---help---
 	  This driver supports TI's CPSW Ethernet Switch.
@@ -60,6 +61,7 @@ config TI_CPSW
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP || COMPILE_TEST
+	depends on COMMON_CLK
 	depends on POSIX_TIMERS
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 634fc484a0b3..f320f9a0de8b 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -31,6 +31,10 @@
 #include <linux/if_vlan.h>
 #include <linux/kmemleak.h>
 #include <linux/sys_soc.h>
+#include <net/page_pool.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
 
 #include <linux/pinctrl/consumer.h>
 #include <net/pkt_cls.h>
@@ -60,6 +64,10 @@ static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT;
 module_param(descs_pool_size, int, 0444);
 MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
 
+/* The buf includes headroom compatible with both skb and xdpf */
+#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN)
+#define CPSW_HEADROOM  ALIGN(CPSW_HEADROOM_NA, sizeof(long))
+
 #define for_each_slave(priv, func, arg...)				\
 	do {								\
 		struct cpsw_slave *slave;				\
@@ -74,6 +82,11 @@ MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
 				(func)(slave++, ##arg);			\
 	} while (0)
 
+#define CPSW_XMETA_OFFSET	ALIGN(sizeof(struct xdp_frame), sizeof(long))
+
+#define CPSW_XDP_CONSUMED		1
+#define CPSW_XDP_PASS			0
+
 static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
 				    __be16 proto, u16 vid);
 
@@ -337,24 +350,58 @@ void cpsw_intr_disable(struct cpsw_common *cpsw)
 	return;
 }
 
+static int cpsw_is_xdpf_handle(void *handle)
+{
+	return (unsigned long)handle & BIT(0);
+}
+
+static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf)
+{
+	return (void *)((unsigned long)xdpf | BIT(0));
+}
+
+static struct xdp_frame *cpsw_handle_to_xdpf(void *handle)
+{
+	return (struct xdp_frame *)((unsigned long)handle & ~BIT(0));
+}
+
+struct __aligned(sizeof(long)) cpsw_meta_xdp {
+	struct net_device *ndev;
+	int ch;
+};
+
 void cpsw_tx_handler(void *token, int len, int status)
 {
+	struct cpsw_meta_xdp	*xmeta;
+	struct xdp_frame	*xdpf;
+	struct net_device	*ndev;
 	struct netdev_queue	*txq;
-	struct sk_buff		*skb = token;
-	struct net_device	*ndev = skb->dev;
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct sk_buff		*skb;
+	int			ch;
+
+	if (cpsw_is_xdpf_handle(token)) {
+		xdpf = cpsw_handle_to_xdpf(token);
+		xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
+		ndev = xmeta->ndev;
+		ch = xmeta->ch;
+		xdp_return_frame(xdpf);
+	} else {
+		skb = token;
+		ndev = skb->dev;
+		ch = skb_get_queue_mapping(skb);
+		cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb);
+		dev_kfree_skb_any(skb);
+	}
 
 	/* Check whether the queue is stopped due to stalled tx dma, if the
 	 * queue is stopped then start the queue as we have free desc for tx
 	 */
-	txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+	txq = netdev_get_tx_queue(ndev, ch);
 	if (unlikely(netif_tx_queue_stopped(txq)))
 		netif_tx_wake_queue(txq);
 
-	cpts_tx_timestamp(cpsw->cpts, skb);
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += len;
-	dev_kfree_skb_any(skb);
 }
 
 static void cpsw_rx_vlan_encap(struct sk_buff *skb)
@@ -400,24 +447,252 @@ static void cpsw_rx_vlan_encap(struct sk_buff *skb)
 	}
 }
 
+static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf,
+			     struct page *page)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_meta_xdp *xmeta;
+	struct cpdma_chan *txch;
+	dma_addr_t dma;
+	int ret, port;
+
+	xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
+	xmeta->ndev = priv->ndev;
+	xmeta->ch = 0;
+	txch = cpsw->txv[0].ch;
+
+	port = priv->emac_port + cpsw->data.dual_emac;
+	if (page) {
+		dma = page_pool_get_dma_addr(page);
+		dma += xdpf->headroom + sizeof(struct xdp_frame);
+		ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf),
+					       dma, xdpf->len, port);
+	} else {
+		if (sizeof(*xmeta) > xdpf->headroom) {
+			xdp_return_frame_rx_napi(xdpf);
+			return -EINVAL;
+		}
+
+		ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf),
+					xdpf->data, xdpf->len, port);
+	}
+
+	if (ret) {
+		priv->ndev->stats.tx_dropped++;
+		xdp_return_frame_rx_napi(xdpf);
+	}
+
+	return ret;
+}
+
+static int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
+			struct page *page)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct net_device *ndev = priv->ndev;
+	int ret = CPSW_XDP_CONSUMED;
+	struct xdp_frame *xdpf;
+	struct bpf_prog *prog;
+	u32 act;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(priv->xdp_prog);
+	if (!prog) {
+		ret = CPSW_XDP_PASS;
+		goto out;
+	}
+
+	act = bpf_prog_run_xdp(prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		ret = CPSW_XDP_PASS;
+		break;
+	case XDP_TX:
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf))
+			goto drop;
+
+		cpsw_xdp_tx_frame(priv, xdpf, page);
+		break;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(ndev, xdp, prog))
+			goto drop;
+
+		/*  Have to flush here, per packet, instead of doing it in bulk
+		 *  at the end of the napi handler. The RX devices on this
+		 *  particular hardware is sharing a common queue, so the
+		 *  incoming device might change per packet.
+		 */
+		xdp_do_flush_map();
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(ndev, prog, act);
+		/* fall through -- handle aborts by dropping packet */
+	case XDP_DROP:
+		goto drop;
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+drop:
+	rcu_read_unlock();
+	page_pool_recycle_direct(cpsw->page_pool[ch], page);
+	return ret;
+}
+
+static unsigned int cpsw_rxbuf_total_len(unsigned int len)
+{
+	len += CPSW_HEADROOM;
+	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	return SKB_DATA_ALIGN(len);
+}
+
+static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
+					       int size)
+{
+	struct page_pool_params pp_params;
+	struct page_pool *pool;
+
+	pp_params.order = 0;
+	pp_params.flags = PP_FLAG_DMA_MAP;
+	pp_params.pool_size = size;
+	pp_params.nid = NUMA_NO_NODE;
+	pp_params.dma_dir = DMA_BIDIRECTIONAL;
+	pp_params.dev = cpsw->dev;
+
+	pool = page_pool_create(&pp_params);
+	if (IS_ERR(pool))
+		dev_err(cpsw->dev, "cannot create rx page pool\n");
+
+	return pool;
+}
+
+static int cpsw_ndev_create_xdp_rxq(struct cpsw_priv *priv, int ch)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct xdp_rxq_info *rxq;
+	struct page_pool *pool;
+	int ret;
+
+	pool = cpsw->page_pool[ch];
+	rxq = &priv->xdp_rxq[ch];
+
+	ret = xdp_rxq_info_reg(rxq, priv->ndev, ch);
+	if (ret)
+		return ret;
+
+	ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
+	if (ret)
+		xdp_rxq_info_unreg(rxq);
+
+	return ret;
+}
+
+static void cpsw_ndev_destroy_xdp_rxq(struct cpsw_priv *priv, int ch)
+{
+	struct xdp_rxq_info *rxq = &priv->xdp_rxq[ch];
+
+	if (!xdp_rxq_info_is_reg(rxq))
+		return;
+
+	xdp_rxq_info_unreg(rxq);
+}
+
+static int cpsw_create_rx_pool(struct cpsw_common *cpsw, int ch)
+{
+	struct page_pool *pool;
+	int ret = 0, pool_size;
+
+	pool_size = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
+	pool = cpsw_create_page_pool(cpsw, pool_size);
+	if (IS_ERR(pool))
+		ret = PTR_ERR(pool);
+	else
+		cpsw->page_pool[ch] = pool;
+
+	return ret;
+}
+
+void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i, ch;
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			ndev = cpsw->slaves[i].ndev;
+			if (!ndev)
+				continue;
+
+			cpsw_ndev_destroy_xdp_rxq(netdev_priv(ndev), ch);
+		}
+
+		page_pool_destroy(cpsw->page_pool[ch]);
+		cpsw->page_pool[ch] = NULL;
+	}
+}
+
+int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i, ch, ret;
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		ret = cpsw_create_rx_pool(cpsw, ch);
+		if (ret)
+			goto err_cleanup;
+
+		/* using same page pool is allowed as no running rx handlers
+		 * simultaneously for both ndevs
+		 */
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			ndev = cpsw->slaves[i].ndev;
+			if (!ndev)
+				continue;
+
+			ret = cpsw_ndev_create_xdp_rxq(netdev_priv(ndev), ch);
+			if (ret)
+				goto err_cleanup;
+		}
+	}
+
+	return 0;
+
+err_cleanup:
+	cpsw_destroy_xdp_rxqs(cpsw);
+
+	return ret;
+}
+
 static void cpsw_rx_handler(void *token, int len, int status)
 {
-	struct cpdma_chan	*ch;
-	struct sk_buff		*skb = token;
-	struct sk_buff		*new_skb;
-	struct net_device	*ndev = skb->dev;
-	int			ret = 0, port;
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct page		*new_page, *page = token;
+	void			*pa = page_address(page);
+	struct cpsw_meta_xdp	*xmeta = pa + CPSW_XMETA_OFFSET;
+	struct cpsw_common	*cpsw = ndev_to_cpsw(xmeta->ndev);
+	int			pkt_size = cpsw->rx_packet_max;
+	int			ret = 0, port, ch = xmeta->ch;
+	int			headroom = CPSW_HEADROOM;
+	struct net_device	*ndev = xmeta->ndev;
 	struct cpsw_priv	*priv;
+	struct page_pool	*pool;
+	struct sk_buff		*skb;
+	struct xdp_buff		xdp;
+	dma_addr_t		dma;
 
-	if (cpsw->data.dual_emac) {
+	if (cpsw->data.dual_emac && status >= 0) {
 		port = CPDMA_RX_SOURCE_PORT(status);
-		if (port) {
+		if (port)
 			ndev = cpsw->slaves[--port].ndev;
-			skb->dev = ndev;
-		}
 	}
 
+	priv = netdev_priv(ndev);
+	pool = cpsw->page_pool[ch];
 	if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
 		/* In dual emac mode check for all interfaces */
 		if (cpsw->data.dual_emac && cpsw->usage_count &&
@@ -426,47 +701,88 @@ static void cpsw_rx_handler(void *token, int len, int status)
 			 * is already down and the other interface is up
 			 * and running, instead of freeing which results
 			 * in reducing of the number of rx descriptor in
-			 * DMA engine, requeue skb back to cpdma.
+			 * DMA engine, requeue page back to cpdma.
 			 */
-			new_skb = skb;
+			new_page = page;
 			goto requeue;
 		}
 
-		/* the interface is going down, skbs are purged */
-		dev_kfree_skb_any(skb);
+		/* the interface is going down, pages are purged */
+		page_pool_recycle_direct(pool, page);
 		return;
 	}
 
-	new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
-	if (new_skb) {
-		skb_copy_queue_mapping(new_skb, skb);
-		skb_put(skb, len);
-		if (status & CPDMA_RX_VLAN_ENCAP)
-			cpsw_rx_vlan_encap(skb);
-		priv = netdev_priv(ndev);
-		if (priv->rx_ts_enabled)
-			cpts_rx_timestamp(cpsw->cpts, skb);
-		skb->protocol = eth_type_trans(skb, ndev);
-		netif_receive_skb(skb);
-		ndev->stats.rx_bytes += len;
-		ndev->stats.rx_packets++;
-		kmemleak_not_leak(new_skb);
-	} else {
+	new_page = page_pool_dev_alloc_pages(pool);
+	if (unlikely(!new_page)) {
+		new_page = page;
 		ndev->stats.rx_dropped++;
-		new_skb = skb;
+		goto requeue;
 	}
 
-requeue:
-	if (netif_dormant(ndev)) {
-		dev_kfree_skb_any(new_skb);
-		return;
+	if (priv->xdp_prog) {
+		if (status & CPDMA_RX_VLAN_ENCAP) {
+			xdp.data = pa + CPSW_HEADROOM +
+				   CPSW_RX_VLAN_ENCAP_HDR_SIZE;
+			xdp.data_end = xdp.data + len -
+				       CPSW_RX_VLAN_ENCAP_HDR_SIZE;
+		} else {
+			xdp.data = pa + CPSW_HEADROOM;
+			xdp.data_end = xdp.data + len;
+		}
+
+		xdp_set_data_meta_invalid(&xdp);
+
+		xdp.data_hard_start = pa;
+		xdp.rxq = &priv->xdp_rxq[ch];
+
+		ret = cpsw_run_xdp(priv, ch, &xdp, page);
+		if (ret != CPSW_XDP_PASS)
+			goto requeue;
+
+		/* XDP prog might have changed packet data and boundaries */
+		len = xdp.data_end - xdp.data;
+		headroom = xdp.data - xdp.data_hard_start;
+
+		/* XDP prog can modify vlan tag, so can't use encap header */
+		status &= ~CPDMA_RX_VLAN_ENCAP;
 	}
 
-	ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
-	ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
-				skb_tailroom(new_skb), 0);
-	if (WARN_ON(ret < 0))
-		dev_kfree_skb_any(new_skb);
+	/* pass skb to netstack if no XDP prog or returned XDP_PASS */
+	skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size));
+	if (!skb) {
+		ndev->stats.rx_dropped++;
+		page_pool_recycle_direct(pool, page);
+		goto requeue;
+	}
+
+	skb_reserve(skb, headroom);
+	skb_put(skb, len);
+	skb->dev = ndev;
+	if (status & CPDMA_RX_VLAN_ENCAP)
+		cpsw_rx_vlan_encap(skb);
+	if (priv->rx_ts_enabled)
+		cpts_rx_timestamp(cpsw->cpts, skb);
+	skb->protocol = eth_type_trans(skb, ndev);
+
+	/* unmap page as no netstack skb page recycling */
+	page_pool_release_page(pool, page);
+	netif_receive_skb(skb);
+
+	ndev->stats.rx_bytes += len;
+	ndev->stats.rx_packets++;
+
+requeue:
+	xmeta = page_address(new_page) + CPSW_XMETA_OFFSET;
+	xmeta->ndev = ndev;
+	xmeta->ch = ch;
+
+	dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+	ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
+				       pkt_size, 0);
+	if (ret < 0) {
+		WARN_ON(ret == -ENOMEM);
+		page_pool_recycle_direct(pool, new_page);
+	}
 }
 
 void cpsw_split_res(struct cpsw_common *cpsw)
@@ -1035,33 +1351,39 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 int cpsw_fill_rx_channels(struct cpsw_priv *priv)
 {
 	struct cpsw_common *cpsw = priv->cpsw;
-	struct sk_buff *skb;
+	struct cpsw_meta_xdp *xmeta;
+	struct page_pool *pool;
+	struct page *page;
 	int ch_buf_num;
 	int ch, i, ret;
+	dma_addr_t dma;
 
 	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		pool = cpsw->page_pool[ch];
 		ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
 		for (i = 0; i < ch_buf_num; i++) {
-			skb = __netdev_alloc_skb_ip_align(priv->ndev,
-							  cpsw->rx_packet_max,
-							  GFP_KERNEL);
-			if (!skb) {
-				cpsw_err(priv, ifup, "cannot allocate skb\n");
+			page = page_pool_dev_alloc_pages(pool);
+			if (!page) {
+				cpsw_err(priv, ifup, "allocate rx page err\n");
 				return -ENOMEM;
 			}
 
-			skb_set_queue_mapping(skb, ch);
-			ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb,
-						skb->data, skb_tailroom(skb),
-						0);
+			xmeta = page_address(page) + CPSW_XMETA_OFFSET;
+			xmeta->ndev = priv->ndev;
+			xmeta->ch = ch;
+
+			dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
+			ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
+							    page, dma,
+							    cpsw->rx_packet_max,
+							    0);
 			if (ret < 0) {
 				cpsw_err(priv, ifup,
-					 "cannot submit skb to channel %d rx, error %d\n",
+					 "cannot submit page to channel %d rx, error %d\n",
 					 ch, ret);
-				kfree_skb(skb);
+				page_pool_recycle_direct(pool, page);
 				return ret;
 			}
-			kmemleak_not_leak(skb);
 		}
 
 		cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
@@ -1397,6 +1719,13 @@ static int cpsw_ndo_open(struct net_device *ndev)
 			enable_irq(cpsw->irqs_table[0]);
 		}
 
+		/* create rxqs for both infs in dual mac as they use same pool
+		 * and must be destroyed together when no users.
+		 */
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret < 0)
+			goto err_cleanup;
+
 		ret = cpsw_fill_rx_channels(priv);
 		if (ret < 0)
 			goto err_cleanup;
@@ -1423,7 +1752,11 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	return 0;
 
 err_cleanup:
-	cpdma_ctlr_stop(cpsw->dma);
+	if (!cpsw->usage_count) {
+		cpdma_ctlr_stop(cpsw->dma);
+		cpsw_destroy_xdp_rxqs(cpsw);
+	}
+
 	for_each_slave(priv, cpsw_slave_stop, cpsw);
 	pm_runtime_put_sync(cpsw->dev);
 	netif_carrier_off(priv->ndev);
@@ -1447,6 +1780,7 @@ static int cpsw_ndo_stop(struct net_device *ndev)
 		cpsw_intr_disable(cpsw);
 		cpdma_ctlr_stop(cpsw->dma);
 		cpsw_ale_stop(cpsw->ale);
+		cpsw_destroy_xdp_rxqs(cpsw);
 	}
 	for_each_slave(priv, cpsw_slave_stop, cpsw);
 
@@ -2004,6 +2338,64 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 	}
 }
 
+static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf)
+{
+	struct bpf_prog *prog = bpf->prog;
+
+	if (!priv->xdpi.prog && !prog)
+		return 0;
+
+	if (!xdp_attachment_flags_ok(&priv->xdpi, bpf))
+		return -EBUSY;
+
+	WRITE_ONCE(priv->xdp_prog, prog);
+
+	xdp_attachment_setup(&priv->xdpi, bpf);
+
+	return 0;
+}
+
+static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return cpsw_xdp_prog_setup(priv, bpf);
+
+	case XDP_QUERY_PROG:
+		return xdp_attachment_query(&priv->xdpi, bpf);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
+			     struct xdp_frame **frames, u32 flags)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct xdp_frame *xdpf;
+	int i, drops = 0;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		xdpf = frames[i];
+		if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+			continue;
+		}
+
+		if (cpsw_xdp_tx_frame(priv, xdpf, NULL))
+			drops++;
+	}
+
+	return n - drops;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void cpsw_ndo_poll_controller(struct net_device *ndev)
 {
@@ -2032,6 +2424,8 @@ static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
 	.ndo_setup_tc           = cpsw_ndo_setup_tc,
+	.ndo_bpf		= cpsw_ndo_bpf,
+	.ndo_xdp_xmit		= cpsw_ndo_xdp_xmit,
 };
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
@@ -2179,6 +2573,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 			return ret;
 		}
 
+		slave_data->slave_node = slave_node;
 		slave_data->phy_node = of_parse_phandle(slave_node,
 							"phy-handle", 0);
 		parp = of_get_property(slave_node, "phy_id", &lenp);
@@ -2262,8 +2657,7 @@ no_phy_slave:
 
 static void cpsw_remove_dt(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = platform_get_drvdata(pdev);
 	struct cpsw_platform_data *data = &cpsw->data;
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *slave_node;
@@ -2330,6 +2724,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, cpsw->dev);
+	ndev->dev.of_node = cpsw->slaves[1].data->slave_node;
 	ret = register_netdev(ndev);
 	if (ret)
 		dev_err(cpsw->dev, "cpsw: error registering net device\n");
@@ -2474,7 +2869,7 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_cpts;
 	}
 
-	platform_set_drvdata(pdev, ndev);
+	platform_set_drvdata(pdev, cpsw);
 	priv = netdev_priv(ndev);
 	priv->cpsw = cpsw;
 	priv->ndev = ndev;
@@ -2507,6 +2902,7 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, dev);
+	ndev->dev.of_node = cpsw->slaves[0].data->slave_node;
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(dev, "error registering net device\n");
@@ -2567,9 +2963,8 @@ clean_runtime_disable_ret:
 
 static int cpsw_remove(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	int ret;
+	struct cpsw_common *cpsw = platform_get_drvdata(pdev);
+	int i, ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
@@ -2577,9 +2972,9 @@ static int cpsw_remove(struct platform_device *pdev)
 		return ret;
 	}
 
-	if (cpsw->data.dual_emac)
-		unregister_netdev(cpsw->slaves[1].ndev);
-	unregister_netdev(ndev);
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
+			unregister_netdev(cpsw->slaves[i].ndev);
 
 	cpts_release(cpsw->cpts);
 	cpdma_ctlr_destroy(cpsw->dma);
@@ -2592,20 +2987,13 @@ static int cpsw_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int cpsw_suspend(struct device *dev)
 {
-	struct net_device	*ndev = dev_get_drvdata(dev);
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
-
-	if (cpsw->data.dual_emac) {
-		int i;
+	struct cpsw_common *cpsw = dev_get_drvdata(dev);
+	int i;
 
-		for (i = 0; i < cpsw->data.slaves; i++) {
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
 			if (netif_running(cpsw->slaves[i].ndev))
 				cpsw_ndo_stop(cpsw->slaves[i].ndev);
-		}
-	} else {
-		if (netif_running(ndev))
-			cpsw_ndo_stop(ndev);
-	}
 
 	/* Select sleep pin state */
 	pinctrl_pm_select_sleep_state(dev);
@@ -2615,25 +3003,20 @@ static int cpsw_suspend(struct device *dev)
 
 static int cpsw_resume(struct device *dev)
 {
-	struct net_device	*ndev = dev_get_drvdata(dev);
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = dev_get_drvdata(dev);
+	int i;
 
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(dev);
 
 	/* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */
 	rtnl_lock();
-	if (cpsw->data.dual_emac) {
-		int i;
 
-		for (i = 0; i < cpsw->data.slaves; i++) {
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
 			if (netif_running(cpsw->slaves[i].ndev))
 				cpsw_ndo_open(cpsw->slaves[i].ndev);
-		}
-	} else {
-		if (netif_running(ndev))
-			cpsw_ndo_open(ndev);
-	}
+
 	rtnl_unlock();
 
 	return 0;
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index 6d1c9ebae7cc..31248a6cc642 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -458,21 +458,22 @@ int cpsw_nway_reset(struct net_device *ndev)
 static void cpsw_suspend_data_pass(struct net_device *ndev)
 {
 	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	struct cpsw_slave *slave;
 	int i;
 
 	/* Disable NAPI scheduling */
 	cpsw_intr_disable(cpsw);
 
 	/* Stop all transmit queues for every network device.
-	 * Disable re-using rx descriptors with dormant_on.
 	 */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
-		if (!(slave->ndev && netif_running(slave->ndev)))
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (!(ndev && netif_running(ndev)))
 			continue;
 
-		netif_tx_stop_all_queues(slave->ndev);
-		netif_dormant_on(slave->ndev);
+		netif_tx_stop_all_queues(ndev);
+
+		/* Barrier, so that stop_queue visible to other cpus */
+		smp_mb__after_atomic();
 	}
 
 	/* Handle rest of tx packets and stop cpdma channels */
@@ -483,14 +484,8 @@ static int cpsw_resume_data_pass(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
-	struct cpsw_slave *slave;
 	int i, ret;
 
-	/* Allow rx packets handling */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++)
-		if (slave->ndev && netif_running(slave->ndev))
-			netif_dormant_off(slave->ndev);
-
 	/* After this receive is started */
 	if (cpsw->usage_count) {
 		ret = cpsw_fill_rx_channels(priv);
@@ -502,9 +497,11 @@ static int cpsw_resume_data_pass(struct net_device *ndev)
 	}
 
 	/* Resume transmit for every affected interface */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++)
-		if (slave->ndev && netif_running(slave->ndev))
-			netif_tx_start_all_queues(slave->ndev);
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (ndev && netif_running(ndev))
+			netif_tx_start_all_queues(ndev);
+	}
 
 	return 0;
 }
@@ -581,14 +578,26 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
 	return 0;
 }
 
+static void cpsw_fail(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i;
+
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (ndev)
+			dev_close(ndev);
+	}
+}
+
 int cpsw_set_channels_common(struct net_device *ndev,
 			     struct ethtool_channels *chs,
 			     cpdma_handler_fn rx_handler)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
-	struct cpsw_slave *slave;
-	int i, ret;
+	struct net_device *sl_ndev;
+	int i, new_pools, ret;
 
 	ret = cpsw_check_ch_settings(cpsw, chs);
 	if (ret < 0)
@@ -596,6 +605,8 @@ int cpsw_set_channels_common(struct net_device *ndev,
 
 	cpsw_suspend_data_pass(ndev);
 
+	new_pools = (chs->rx_count != cpsw->rx_ch_num) && cpsw->usage_count;
+
 	ret = cpsw_update_channels_res(priv, chs->rx_count, 1, rx_handler);
 	if (ret)
 		goto err;
@@ -604,35 +615,40 @@ int cpsw_set_channels_common(struct net_device *ndev,
 	if (ret)
 		goto err;
 
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
-		if (!(slave->ndev && netif_running(slave->ndev)))
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		sl_ndev = cpsw->slaves[i].ndev;
+		if (!(sl_ndev && netif_running(sl_ndev)))
 			continue;
 
 		/* Inform stack about new count of queues */
-		ret = netif_set_real_num_tx_queues(slave->ndev,
-						   cpsw->tx_ch_num);
+		ret = netif_set_real_num_tx_queues(sl_ndev, cpsw->tx_ch_num);
 		if (ret) {
 			dev_err(priv->dev, "cannot set real number of tx queues\n");
 			goto err;
 		}
 
-		ret = netif_set_real_num_rx_queues(slave->ndev,
-						   cpsw->rx_ch_num);
+		ret = netif_set_real_num_rx_queues(sl_ndev, cpsw->rx_ch_num);
 		if (ret) {
 			dev_err(priv->dev, "cannot set real number of rx queues\n");
 			goto err;
 		}
 	}
 
-	if (cpsw->usage_count)
-		cpsw_split_res(cpsw);
+	cpsw_split_res(cpsw);
+
+	if (new_pools) {
+		cpsw_destroy_xdp_rxqs(cpsw);
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret)
+			goto err;
+	}
 
 	ret = cpsw_resume_data_pass(ndev);
 	if (!ret)
 		return 0;
 err:
 	dev_err(priv->dev, "cannot update channels number, closing device\n");
-	dev_close(ndev);
+	cpsw_fail(cpsw);
 	return ret;
 }
 
@@ -652,9 +668,8 @@ void cpsw_get_ringparam(struct net_device *ndev,
 int cpsw_set_ringparam(struct net_device *ndev,
 		       struct ethtool_ringparam *ering)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int ret;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	int descs_num, ret;
 
 	/* ignore ering->tx_pending - only rx_pending adjustment is supported */
 
@@ -663,22 +678,34 @@ int cpsw_set_ringparam(struct net_device *ndev,
 	    ering->rx_pending > (cpsw->descs_pool_size - CPSW_MAX_QUEUES))
 		return -EINVAL;
 
-	if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
+	descs_num = cpdma_get_num_rx_descs(cpsw->dma);
+	if (ering->rx_pending == descs_num)
 		return 0;
 
 	cpsw_suspend_data_pass(ndev);
 
-	cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
+	ret = cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
+	if (ret) {
+		if (cpsw_resume_data_pass(ndev))
+			goto err;
+
+		return ret;
+	}
 
-	if (cpsw->usage_count)
-		cpdma_chan_split_pool(cpsw->dma);
+	if (cpsw->usage_count) {
+		cpsw_destroy_xdp_rxqs(cpsw);
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret)
+			goto err;
+	}
 
 	ret = cpsw_resume_data_pass(ndev);
 	if (!ret)
 		return 0;
-
+err:
+	cpdma_set_num_rx_descs(cpsw->dma, descs_num);
 	dev_err(cpsw->dev, "cannot set ring params, closing device\n");
-	dev_close(ndev);
+	cpsw_fail(cpsw);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index 04795b97ee71..362c5a986869 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -272,6 +272,7 @@ struct cpsw_host_regs {
 };
 
 struct cpsw_slave_data {
+	struct device_node *slave_node;
 	struct device_node *phy_node;
 	char		phy_id[MII_BUS_ID_SIZE];
 	int		phy_if;
@@ -346,6 +347,7 @@ struct cpsw_common {
 	int				rx_ch_num, tx_ch_num;
 	int				speed;
 	int				usage_count;
+	struct page_pool		*page_pool[CPSW_MAX_QUEUES];
 };
 
 struct cpsw_priv {
@@ -360,6 +362,10 @@ struct cpsw_priv {
 	int				shp_cfg_speed;
 	int				tx_ts_enabled;
 	int				rx_ts_enabled;
+	struct bpf_prog			*xdp_prog;
+	struct xdp_rxq_info		xdp_rxq[CPSW_MAX_QUEUES];
+	struct xdp_attachment_info	xdpi;
+
 	u32 emac_port;
 	struct cpsw_common *cpsw;
 };
@@ -391,6 +397,8 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv);
 void cpsw_intr_enable(struct cpsw_common *cpsw);
 void cpsw_intr_disable(struct cpsw_common *cpsw);
 void cpsw_tx_handler(void *token, int len, int status);
+int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw);
+void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw);
 
 /* ethtool */
 u32 cpsw_get_msglevel(struct net_device *ndev);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index e257018ada71..61136428e2c0 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com>
  *
  */
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/if.h>
 #include <linux/hrtimer.h>
@@ -532,6 +533,82 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
 		 freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
 }
 
+static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
+{
+	struct device_node *refclk_np;
+	const char **parent_names;
+	unsigned int num_parents;
+	struct clk_hw *clk_hw;
+	int ret = -EINVAL;
+	u32 *mux_table;
+
+	refclk_np = of_get_child_by_name(node, "cpts-refclk-mux");
+	if (!refclk_np)
+		/* refclk selection supported not for all SoCs */
+		return 0;
+
+	num_parents = of_clk_get_parent_count(refclk_np);
+	if (num_parents < 1) {
+		dev_err(cpts->dev, "mux-clock %s must have parents\n",
+			refclk_np->name);
+		goto mux_fail;
+	}
+
+	parent_names = devm_kzalloc(cpts->dev, (sizeof(char *) * num_parents),
+				    GFP_KERNEL);
+
+	mux_table = devm_kzalloc(cpts->dev, sizeof(*mux_table) * num_parents,
+				 GFP_KERNEL);
+	if (!mux_table || !parent_names) {
+		ret = -ENOMEM;
+		goto mux_fail;
+	}
+
+	of_clk_parent_fill(refclk_np, parent_names, num_parents);
+
+	ret = of_property_read_variable_u32_array(refclk_np, "ti,mux-tbl",
+						  mux_table,
+						  num_parents, num_parents);
+	if (ret < 0)
+		goto mux_fail;
+
+	clk_hw = clk_hw_register_mux_table(cpts->dev, refclk_np->name,
+					   parent_names, num_parents,
+					   0,
+					   &cpts->reg->rftclk_sel, 0, 0x1F,
+					   0, mux_table, NULL);
+	if (IS_ERR(clk_hw)) {
+		ret = PTR_ERR(clk_hw);
+		goto mux_fail;
+	}
+
+	ret = devm_add_action_or_reset(cpts->dev,
+				       (void(*)(void *))clk_hw_unregister_mux,
+				       clk_hw);
+	if (ret) {
+		dev_err(cpts->dev, "add clkmux unreg action %d", ret);
+		goto mux_fail;
+	}
+
+	ret = of_clk_add_hw_provider(refclk_np, of_clk_hw_simple_get, clk_hw);
+	if (ret)
+		goto mux_fail;
+
+	ret = devm_add_action_or_reset(cpts->dev,
+				       (void(*)(void *))of_clk_del_provider,
+				       refclk_np);
+	if (ret) {
+		dev_err(cpts->dev, "add clkmux provider unreg action %d", ret);
+		goto mux_fail;
+	}
+
+	return ret;
+
+mux_fail:
+	of_node_put(refclk_np);
+	return ret;
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
 	int ret = -EINVAL;
@@ -547,7 +624,7 @@ static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 	    (!cpts->cc.mult && cpts->cc.shift))
 		goto of_error;
 
-	return 0;
+	return cpts_of_mux_clk_setup(cpts, node);
 
 of_error:
 	dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
@@ -572,9 +649,14 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	if (ret)
 		return ERR_PTR(ret);
 
-	cpts->refclk = devm_clk_get(dev, "cpts");
+	cpts->refclk = devm_get_clk_from_child(dev, node, "cpts");
+	if (IS_ERR(cpts->refclk))
+		/* try get clk from dev node for compatibility */
+		cpts->refclk = devm_clk_get(dev, "cpts");
+
 	if (IS_ERR(cpts->refclk)) {
-		dev_err(dev, "Failed to get cpts refclk\n");
+		dev_err(dev, "Failed to get cpts refclk %ld\n",
+			PTR_ERR(cpts->refclk));
 		return ERR_CAST(cpts->refclk);
 	}
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 024aab6af12f..bb997c11ee15 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -24,7 +24,7 @@
 struct cpsw_cpts {
 	u32 idver;                /* Identification and version */
 	u32 control;              /* Time sync control */
-	u32 res1;
+	u32 rftclk_sel;		  /* Reference Clock Select Register */
 	u32 ts_push;              /* Time stamp event push */
 	u32 ts_load_val;          /* Time stamp load value */
 	u32 ts_load_en;           /* Time stamp load enable */
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 35bf14d8e7af..0ca2a1a254de 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -134,6 +134,15 @@ struct cpdma_control_info {
 #define ACCESS_RW	(ACCESS_RO | ACCESS_WO)
 };
 
+struct submit_info {
+	struct cpdma_chan *chan;
+	int directed;
+	void *token;
+	void *data;
+	int flags;
+	int len;
+};
+
 static struct cpdma_control_info controls[] = {
 	[CPDMA_TX_RLIM]		  = {CPDMA_DMACONTROL,	8,  0xffff, ACCESS_RW},
 	[CPDMA_CMD_IDLE]	  = {CPDMA_DMACONTROL,	3,  1,      ACCESS_WO},
@@ -176,6 +185,8 @@ static struct cpdma_control_info controls[] = {
 				 (directed << CPDMA_TO_PORT_SHIFT));	\
 	} while (0)
 
+#define CPDMA_DMA_EXT_MAP		BIT(16)
+
 static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
 {
 	struct cpdma_desc_pool *pool = ctlr->pool;
@@ -1002,34 +1013,26 @@ static void __cpdma_chan_submit(struct cpdma_chan *chan,
 	}
 }
 
-int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
-		      int len, int directed)
+static int cpdma_chan_submit_si(struct submit_info *si)
 {
+	struct cpdma_chan		*chan = si->chan;
 	struct cpdma_ctlr		*ctlr = chan->ctlr;
+	int				len = si->len;
+	int				swlen = len;
 	struct cpdma_desc __iomem	*desc;
 	dma_addr_t			buffer;
-	unsigned long			flags;
 	u32				mode;
-	int				ret = 0;
-
-	spin_lock_irqsave(&chan->lock, flags);
-
-	if (chan->state == CPDMA_STATE_TEARDOWN) {
-		ret = -EINVAL;
-		goto unlock_ret;
-	}
+	int				ret;
 
 	if (chan->count >= chan->desc_num)	{
 		chan->stats.desc_alloc_fail++;
-		ret = -ENOMEM;
-		goto unlock_ret;
+		return -ENOMEM;
 	}
 
 	desc = cpdma_desc_alloc(ctlr->pool);
 	if (!desc) {
 		chan->stats.desc_alloc_fail++;
-		ret = -ENOMEM;
-		goto unlock_ret;
+		return -ENOMEM;
 	}
 
 	if (len < ctlr->params.min_packet_size) {
@@ -1037,16 +1040,21 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 		chan->stats.runt_transmit_buff++;
 	}
 
-	buffer = dma_map_single(ctlr->dev, data, len, chan->dir);
-	ret = dma_mapping_error(ctlr->dev, buffer);
-	if (ret) {
-		cpdma_desc_free(ctlr->pool, desc, 1);
-		ret = -EINVAL;
-		goto unlock_ret;
-	}
-
 	mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
-	cpdma_desc_to_port(chan, mode, directed);
+	cpdma_desc_to_port(chan, mode, si->directed);
+
+	if (si->flags & CPDMA_DMA_EXT_MAP) {
+		buffer = (dma_addr_t)si->data;
+		dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir);
+		swlen |= CPDMA_DMA_EXT_MAP;
+	} else {
+		buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir);
+		ret = dma_mapping_error(ctlr->dev, buffer);
+		if (ret) {
+			cpdma_desc_free(ctlr->pool, desc, 1);
+			return -EINVAL;
+		}
+	}
 
 	/* Relaxed IO accessors can be used here as there is read barrier
 	 * at the end of write sequence.
@@ -1055,9 +1063,9 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 	writel_relaxed(buffer, &desc->hw_buffer);
 	writel_relaxed(len, &desc->hw_len);
 	writel_relaxed(mode | len, &desc->hw_mode);
-	writel_relaxed((uintptr_t)token, &desc->sw_token);
+	writel_relaxed((uintptr_t)si->token, &desc->sw_token);
 	writel_relaxed(buffer, &desc->sw_buffer);
-	writel_relaxed(len, &desc->sw_len);
+	writel_relaxed(swlen, &desc->sw_len);
 	desc_read(desc, sw_len);
 
 	__cpdma_chan_submit(chan, desc);
@@ -1066,8 +1074,105 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 		chan_write(chan, rxfree, 1);
 
 	chan->count++;
+	return 0;
+}
 
-unlock_ret:
+int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
+			   int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data = data;
+	si.len = len;
+	si.directed = directed;
+	si.flags = 0;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state == CPDMA_STATE_TEARDOWN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
+
+int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
+				  dma_addr_t data, int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data = (void *)data;
+	si.len = len;
+	si.directed = directed;
+	si.flags = CPDMA_DMA_EXT_MAP;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state == CPDMA_STATE_TEARDOWN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
+
+int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
+		      int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data = data;
+	si.len = len;
+	si.directed = directed;
+	si.flags = 0;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state != CPDMA_STATE_ACTIVE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
+
+int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
+			     dma_addr_t data, int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data = (void *)data;
+	si.len = len;
+	si.directed = directed;
+	si.flags = CPDMA_DMA_EXT_MAP;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state != CPDMA_STATE_ACTIVE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
 	spin_unlock_irqrestore(&chan->lock, flags);
 	return ret;
 }
@@ -1097,10 +1202,17 @@ static void __cpdma_chan_free(struct cpdma_chan *chan,
 	uintptr_t			token;
 
 	token      = desc_read(desc, sw_token);
-	buff_dma   = desc_read(desc, sw_buffer);
 	origlen    = desc_read(desc, sw_len);
 
-	dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
+	buff_dma   = desc_read(desc, sw_buffer);
+	if (origlen & CPDMA_DMA_EXT_MAP) {
+		origlen &= ~CPDMA_DMA_EXT_MAP;
+		dma_sync_single_for_cpu(ctlr->dev, buff_dma, origlen,
+					chan->dir);
+	} else {
+		dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
+	}
+
 	cpdma_desc_free(pool, desc, 1);
 	(*chan->handler)((void *)token, outlen, status);
 }
@@ -1311,8 +1423,23 @@ int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr)
 	return ctlr->num_tx_desc;
 }
 
-void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
+int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
 {
+	unsigned long flags;
+	int temp, ret;
+
+	spin_lock_irqsave(&ctlr->lock, flags);
+
+	temp = ctlr->num_rx_desc;
 	ctlr->num_rx_desc = num_rx_desc;
 	ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
+	ret = cpdma_chan_split_pool(ctlr);
+	if (ret) {
+		ctlr->num_rx_desc = temp;
+		ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
+	}
+
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+
+	return ret;
 }
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index 10376062dafa..d3cfe234d16a 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -77,8 +77,14 @@ int cpdma_chan_stop(struct cpdma_chan *chan);
 
 int cpdma_chan_get_stats(struct cpdma_chan *chan,
 			 struct cpdma_chan_stats *stats);
+int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
+			     dma_addr_t data, int len, int directed);
 int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 		      int len, int directed);
+int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
+				  dma_addr_t data, int len, int directed);
+int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
+			   int len, int directed);
 int cpdma_chan_process(struct cpdma_chan *chan, int quota);
 
 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable);
@@ -110,8 +116,7 @@ enum cpdma_control {
 int cpdma_control_get(struct cpdma_ctlr *ctlr, int control);
 int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value);
 int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr);
-void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
+int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
 int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr);
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr);
 
 #endif
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 4bf65cab79e6..5f4ece0d5a73 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1428,8 +1428,8 @@ static int emac_dev_open(struct net_device *ndev)
 		if (!skb)
 			break;
 
-		ret = cpdma_chan_submit(priv->rxchan, skb, skb->data,
-					skb_tailroom(skb), 0);
+		ret = cpdma_chan_idle_submit(priv->rxchan, skb, skb->data,
+					     skb_tailroom(skb), 0);
 		if (WARN_ON(ret < 0))
 			break;
 	}
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index ec179700c184..2c1fac33136c 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3554,7 +3554,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
 static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 		     struct device_node *node, void **inst_priv)
 {
-	struct device_node *interfaces, *interface;
+	struct device_node *interfaces, *interface, *cpts_node;
 	struct device_node *secondary_ports;
 	struct cpsw_ale_params ale_params;
 	struct gbe_priv *gbe_dev;
@@ -3713,7 +3713,12 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 		dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n");
 	}
 
-	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, node);
+	cpts_node = of_get_child_by_name(node, "cpts");
+	if (!cpts_node)
+		cpts_node = of_node_get(node);
+
+	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, cpts_node);
+	of_node_put(cpts_node);
 	if (IS_ENABLED(CONFIG_TI_CPTS) && IS_ERR(gbe_dev->cpts)) {
 		ret = PTR_ERR(gbe_dev->cpts);
 		goto free_sec_ports;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 3ecddb72f45a..051033580f0a 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
@@ -301,7 +301,7 @@ struct gelic_card {
 	 */
 	unsigned int irq;
 	struct gelic_descr *tx_top, *rx_top;
-	struct gelic_descr descr[0]; /* must be the last */
+	struct gelic_descr descr[]; /* must be the last */
 };
 
 struct gelic_port {
diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h
index c0ecc6c7b5e0..cdfe7809e3c1 100644
--- a/drivers/net/ethernet/via/via-velocity.h
+++ b/drivers/net/ethernet/via/via-velocity.h
@@ -1509,7 +1509,7 @@ static inline int velocity_get_ip(struct velocity_info *vptr)
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(vptr->netdev);
 	if (in_dev != NULL) {
-		ifa = (struct in_ifaddr *) in_dev->ifa_list;
+		ifa = rcu_dereference(in_dev->ifa_list);
 		if (ifa != NULL) {
 			memcpy(vptr->ip_addr, &ifa->ifa_address, 4);
 			res = 0;
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 918b3e50850a..2b4126d2427d 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/of_net.h>
+#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
 #include "w5100.h"
@@ -409,14 +410,32 @@ static const struct w5100_ops w5500_ops = {
 	.init = w5500_spi_init,
 };
 
+static const struct of_device_id w5100_of_match[] = {
+	{ .compatible = "wiznet,w5100", .data = (const void*)W5100, },
+	{ .compatible = "wiznet,w5200", .data = (const void*)W5200, },
+	{ .compatible = "wiznet,w5500", .data = (const void*)W5500, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, w5100_of_match);
+
 static int w5100_spi_probe(struct spi_device *spi)
 {
-	const struct spi_device_id *id = spi_get_device_id(spi);
+	const struct of_device_id *of_id;
 	const struct w5100_ops *ops;
+	kernel_ulong_t driver_data;
 	int priv_size;
 	const void *mac = of_get_mac_address(spi->dev.of_node);
 
-	switch (id->driver_data) {
+	if (spi->dev.of_node) {
+		of_id = of_match_device(w5100_of_match, &spi->dev);
+		if (!of_id)
+			return -ENODEV;
+		driver_data = (kernel_ulong_t)of_id->data;
+	} else {
+		driver_data = spi_get_device_id(spi)->driver_data;
+	}
+
+	switch (driver_data) {
 	case W5100:
 		ops = &w5100_spi_ops;
 		priv_size = 0;
@@ -453,6 +472,7 @@ static struct spi_driver w5100_spi_driver = {
 	.driver		= {
 		.name	= "w5100",
 		.pm	= &w5100_pm_ops,
+		.of_match_table = w5100_of_match,
 	},
 	.probe		= w5100_spi_probe,
 	.remove		= w5100_spi_remove,
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index af96e05c5bcd..8d994cebb6b0 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -6,7 +6,7 @@
 config NET_VENDOR_XILINX
 	bool "Xilinx devices"
 	default y
-	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || X86 || COMPILE_TEST
+	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || X86 || ARM || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -26,8 +26,8 @@ config XILINX_EMACLITE
 
 config XILINX_AXI_EMAC
 	tristate "Xilinx 10/100/1000 AXI Ethernet support"
-	depends on MICROBLAZE
-	select PHYLIB
+	depends on MICROBLAZE || X86 || ARM || COMPILE_TEST
+	select PHYLINK
 	---help---
 	  This driver supports the 10/100/1000 Ethernet from Xilinx for the
 	  AXI bus interface used in Xilinx Virtex FPGAs.
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 1aeda084b8f1..276292bca334 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -361,7 +361,7 @@ struct temac_local {
 	/* For synchronization of indirect register access.  Must be
 	 * shared mutex between interfaces in same TEMAC block.
 	 */
-	struct mutex *indirect_mutex;
+	spinlock_t *indirect_lock;
 	u32 options;			/* Current options word */
 	int last_link;
 	unsigned int temac_features;
@@ -388,8 +388,9 @@ struct temac_local {
 /* xilinx_temac.c */
 int temac_indirect_busywait(struct temac_local *lp);
 u32 temac_indirect_in32(struct temac_local *lp, int reg);
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg);
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value);
-
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value);
 
 /* xilinx_temac_mdio.c */
 int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 14870d659f7d..21c1b4322ea7 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -22,7 +22,6 @@
  *
  * TODO:
  * - Factor out locallink DMA code into separate driver
- * - Fix multicast assignment.
  * - Fix support for hardware checksumming.
  * - Testing.  Lots and lots of testing.
  *
@@ -53,6 +52,7 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/processor.h>
 #include <linux/platform_data/xilinx-ll-temac.h>
 
 #include "ll_temac.h"
@@ -84,51 +84,118 @@ static void _temac_iow_le(struct temac_local *lp, int offset, u32 value)
 	return iowrite32(value, lp->regs + offset);
 }
 
+static bool hard_acs_rdy(struct temac_local *lp)
+{
+	return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK;
+}
+
+static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout)
+{
+	ktime_t cur = ktime_get();
+
+	return hard_acs_rdy(lp) || ktime_after(cur, timeout);
+}
+
+/* Poll for maximum 20 ms.  This is similar to the 2 jiffies @ 100 Hz
+ * that was used before, and should cover MDIO bus speed down to 3200
+ * Hz.
+ */
+#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC)
+
+/**
+ * temac_indirect_busywait - Wait for current indirect register access
+ * to complete.
+ */
 int temac_indirect_busywait(struct temac_local *lp)
 {
-	unsigned long end = jiffies + 2;
+	ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS);
 
-	while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) {
-		if (time_before_eq(end, jiffies)) {
-			WARN_ON(1);
-			return -ETIMEDOUT;
-		}
-		usleep_range(500, 1000);
-	}
-	return 0;
+	spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout));
+	if (WARN_ON(!hard_acs_rdy(lp)))
+		return -ETIMEDOUT;
+	else
+		return 0;
 }
 
 /**
- * temac_indirect_in32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_in32 - Indirect register read access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 u32 temac_indirect_in32(struct temac_local *lp, int reg)
 {
-	u32 val;
+	unsigned long flags;
+	int val;
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+	return val;
+}
 
-	if (temac_indirect_busywait(lp))
+/**
+ * temac_indirect_in32_locked - Indirect register read access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg)
+{
+	/* This initial wait should normally not spin, as we always
+	 * try to wait for indirect access to complete before
+	 * releasing the indirect_lock.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return -ETIMEDOUT;
+	/* Initiate read from indirect register */
 	temac_iow(lp, XTE_CTL0_OFFSET, reg);
-	if (temac_indirect_busywait(lp))
+	/* Wait for indirect register access to complete.  We really
+	 * should not see timeouts, and could even end up causing
+	 * problem for following indirect access, so let's make a bit
+	 * of WARN noise.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return -ETIMEDOUT;
-	val = temac_ior(lp, XTE_LSW0_OFFSET);
-
-	return val;
+	/* Value is ready now */
+	return temac_ior(lp, XTE_LSW0_OFFSET);
 }
 
 /**
- * temac_indirect_out32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_out32 - Indirect register write access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value)
 {
-	if (temac_indirect_busywait(lp))
+	unsigned long flags;
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, reg, value);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+}
+
+/**
+ * temac_indirect_out32_locked - Indirect register write access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value)
+{
+	/* As in temac_indirect_in32_locked(), we should normally not
+	 * spin here.  And if it happens, we actually end up silently
+	 * ignoring the write request.  Ouch.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return;
+	/* Initiate write to indirect register */
 	temac_iow(lp, XTE_LSW0_OFFSET, value);
 	temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg);
-	temac_indirect_busywait(lp);
+	/* As in temac_indirect_in32_locked(), we should not see timeouts
+	 * here.  And if it happens, we continue before the write has
+	 * completed.  Not good.
+	 */
+	WARN_ON(temac_indirect_busywait(lp));
 }
 
 /**
@@ -344,20 +411,21 @@ out:
 static void temac_do_set_mac_address(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
+	unsigned long flags;
 
 	/* set up unicast MAC address filter set its mac address */
-	mutex_lock(lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_UAW0_OFFSET,
-			     (ndev->dev_addr[0]) |
-			     (ndev->dev_addr[1] << 8) |
-			     (ndev->dev_addr[2] << 16) |
-			     (ndev->dev_addr[3] << 24));
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET,
+				    (ndev->dev_addr[0]) |
+				    (ndev->dev_addr[1] << 8) |
+				    (ndev->dev_addr[2] << 16) |
+				    (ndev->dev_addr[3] << 24));
 	/* There are reserved bits in EUAW1
 	 * so don't affect them Set MAC bits [47:32] in EUAW1 */
-	temac_indirect_out32(lp, XTE_UAW1_OFFSET,
-			     (ndev->dev_addr[4] & 0x000000ff) |
-			     (ndev->dev_addr[5] << 8));
-	mutex_unlock(lp->indirect_mutex);
+	temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET,
+				    (ndev->dev_addr[4] & 0x000000ff) |
+				    (ndev->dev_addr[5] << 8));
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 }
 
 static int temac_init_mac_address(struct net_device *ndev, const void *address)
@@ -383,49 +451,58 @@ static int temac_set_mac_address(struct net_device *ndev, void *p)
 static void temac_set_multicast_list(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
-	u32 multi_addr_msw, multi_addr_lsw, val;
-	int i;
+	u32 multi_addr_msw, multi_addr_lsw;
+	int i = 0;
+	unsigned long flags;
+	bool promisc_mode_disabled = false;
 
-	mutex_lock(lp->indirect_mutex);
-	if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) ||
-	    netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) {
-		/*
-		 *	We must make the kernel realise we had to move
-		 *	into promisc mode or we start all out war on
-		 *	the cable. If it was a promisc request the
-		 *	flag is already set. If not we assert it.
-		 */
-		ndev->flags |= IFF_PROMISC;
+	if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) ||
+	    (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) {
 		temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
 		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
-	} else if (!netdev_mc_empty(ndev)) {
+		return;
+	}
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+
+	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
-		i = 0;
 		netdev_for_each_mc_addr(ha, ndev) {
-			if (i >= MULTICAST_CAM_TABLE_NUM)
+			if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM))
 				break;
 			multi_addr_msw = ((ha->addr[3] << 24) |
 					  (ha->addr[2] << 16) |
 					  (ha->addr[1] << 8) |
 					  (ha->addr[0]));
-			temac_indirect_out32(lp, XTE_MAW0_OFFSET,
-					     multi_addr_msw);
+			temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET,
+						    multi_addr_msw);
 			multi_addr_lsw = ((ha->addr[5] << 8) |
 					  (ha->addr[4]) | (i << 16));
-			temac_indirect_out32(lp, XTE_MAW1_OFFSET,
-					     multi_addr_lsw);
+			temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET,
+						    multi_addr_lsw);
 			i++;
 		}
-	} else {
-		val = temac_indirect_in32(lp, XTE_AFM_OFFSET);
-		temac_indirect_out32(lp, XTE_AFM_OFFSET,
-				     val & ~XTE_AFM_EPPRM_MASK);
-		temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0);
-		temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0);
-		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 	}
-	mutex_unlock(lp->indirect_mutex);
+
+	/* Clear all or remaining/unused address table entries */
+	while (i < MULTICAST_CAM_TABLE_NUM) {
+		temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0);
+		temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16);
+		i++;
+	}
+
+	/* Enable address filter block if currently disabled */
+	if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET)
+	    & XTE_AFM_EPPRM_MASK) {
+		temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0);
+		promisc_mode_disabled = true;
+	}
+
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+
+	if (promisc_mode_disabled)
+		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 }
 
 static struct temac_option {
@@ -516,17 +593,19 @@ static u32 temac_setoptions(struct net_device *ndev, u32 options)
 	struct temac_local *lp = netdev_priv(ndev);
 	struct temac_option *tp = &temac_options[0];
 	int reg;
+	unsigned long flags;
 
-	mutex_lock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	while (tp->opt) {
-		reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or;
-		if (options & tp->opt)
+		reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or;
+		if (options & tp->opt) {
 			reg |= tp->m_or;
-		temac_indirect_out32(lp, tp->reg, reg);
+			temac_indirect_out32_locked(lp, tp->reg, reg);
+		}
 		tp++;
 	}
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 	lp->options |= options;
-	mutex_unlock(lp->indirect_mutex);
 
 	return 0;
 }
@@ -537,6 +616,7 @@ static void temac_device_reset(struct net_device *ndev)
 	struct temac_local *lp = netdev_priv(ndev);
 	u32 timeout;
 	u32 val;
+	unsigned long flags;
 
 	/* Perform a software reset */
 
@@ -545,7 +625,6 @@ static void temac_device_reset(struct net_device *ndev)
 
 	dev_dbg(&ndev->dev, "%s()\n", __func__);
 
-	mutex_lock(lp->indirect_mutex);
 	/* Reset the receiver and wait for it to finish reset */
 	temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK);
 	timeout = 1000;
@@ -571,8 +650,11 @@ static void temac_device_reset(struct net_device *ndev)
 	}
 
 	/* Disable the receiver */
-	val = temac_indirect_in32(lp, XTE_RXC1_OFFSET);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET,
+				    val & ~XTE_RXC1_RXEN_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Reset Local Link (DMA) */
 	lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST);
@@ -592,12 +674,12 @@ static void temac_device_reset(struct net_device *ndev)
 				"temac_device_reset descriptor allocation failed\n");
 	}
 
-	temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_TXC_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
-
-	mutex_unlock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Sync default options with HW
 	 * but leave receiver and transmitter disabled.  */
@@ -621,13 +703,14 @@ static void temac_adjust_link(struct net_device *ndev)
 	struct phy_device *phy = ndev->phydev;
 	u32 mii_speed;
 	int link_state;
+	unsigned long flags;
 
 	/* hash together the state values to decide if something has changed */
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 
-	mutex_lock(lp->indirect_mutex);
 	if (lp->last_link != link_state) {
-		mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET);
+		spin_lock_irqsave(lp->indirect_lock, flags);
+		mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET);
 		mii_speed &= ~XTE_EMCFG_LINKSPD_MASK;
 
 		switch (phy->speed) {
@@ -637,11 +720,12 @@ static void temac_adjust_link(struct net_device *ndev)
 		}
 
 		/* Write new speed setting out to TEMAC */
-		temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed);
+		temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed);
+		spin_unlock_irqrestore(lp->indirect_lock, flags);
+
 		lp->last_link = link_state;
 		phy_print_status(phy);
 	}
-	mutex_unlock(lp->indirect_mutex);
 }
 
 #ifdef CONFIG_64BIT
@@ -1011,6 +1095,7 @@ static const struct net_device_ops temac_netdev_ops = {
 	.ndo_open = temac_open,
 	.ndo_stop = temac_stop,
 	.ndo_start_xmit = temac_start_xmit,
+	.ndo_set_rx_mode = temac_set_multicast_list,
 	.ndo_set_mac_address = temac_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_do_ioctl = temac_ioctl,
@@ -1076,7 +1161,6 @@ static int temac_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 	SET_NETDEV_DEV(ndev, &pdev->dev);
-	ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
 	ndev->features = NETIF_F_SG;
 	ndev->netdev_ops = &temac_netdev_ops;
 	ndev->ethtool_ops = &temac_ethtool_ops;
@@ -1103,17 +1187,17 @@ static int temac_probe(struct platform_device *pdev)
 
 	/* Setup mutex for synchronization of indirect register access */
 	if (pdata) {
-		if (!pdata->indirect_mutex) {
+		if (!pdata->indirect_lock) {
 			dev_err(&pdev->dev,
-				"indirect_mutex missing in platform_data\n");
+				"indirect_lock missing in platform_data\n");
 			return -EINVAL;
 		}
-		lp->indirect_mutex = pdata->indirect_mutex;
+		lp->indirect_lock = pdata->indirect_lock;
 	} else {
-		lp->indirect_mutex = devm_kmalloc(&pdev->dev,
-						  sizeof(*lp->indirect_mutex),
-						  GFP_KERNEL);
-		mutex_init(lp->indirect_mutex);
+		lp->indirect_lock = devm_kmalloc(&pdev->dev,
+						 sizeof(*lp->indirect_lock),
+						 GFP_KERNEL);
+		spin_lock_init(lp->indirect_lock);
 	}
 
 	/* map device registers */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
index a4667326f745..6fd2dea4e60f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
@@ -25,14 +25,15 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 {
 	struct temac_local *lp = bus->priv;
 	u32 rc;
+	unsigned long flags;
 
 	/* Write the PHY address to the MIIM Access Initiator register.
 	 * When the transfer completes, the PHY register value will appear
 	 * in the LSW0 register */
-	mutex_lock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg);
-	rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET);
-	mutex_unlock(lp->indirect_mutex);
+	rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n",
 		phy_id, reg, rc);
@@ -43,6 +44,7 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
 {
 	struct temac_local *lp = bus->priv;
+	unsigned long flags;
 
 	dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n",
 		phy_id, reg, val);
@@ -50,10 +52,10 @@ static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
 	/* First write the desired value into the write data register
 	 * and then write the address into the access initiator register
 	 */
-	mutex_lock(lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val);
-	temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
-	mutex_unlock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val);
+	temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	return 0;
 }
@@ -87,9 +89,7 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
 
 	/* Enable the MDIO bus by asserting the enable bit and writing
 	 * in the clock config */
-	mutex_lock(lp->indirect_mutex);
 	temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div);
-	mutex_unlock(lp->indirect_mutex);
 
 	bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!bus)
@@ -116,10 +116,8 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	mutex_lock(lp->indirect_mutex);
 	dev_dbg(lp->dev, "MDIO bus registered;  MC:%x\n",
 		temac_indirect_in32(lp, XTE_MC_OFFSET));
-	mutex_unlock(lp->indirect_mutex);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 011adae32b89..2dacfc85b3ba 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/if_vlan.h>
+#include <linux/phylink.h>
 
 /* Packet size info */
 #define XAE_HDR_SIZE			14 /* Size of Ethernet header */
@@ -83,6 +84,8 @@
 #define XAXIDMA_CR_RUNSTOP_MASK	0x00000001 /* Start/stop DMA channel */
 #define XAXIDMA_CR_RESET_MASK	0x00000004 /* Reset DMA engine */
 
+#define XAXIDMA_SR_HALT_MASK	0x00000001 /* Indicates DMA channel halted */
+
 #define XAXIDMA_BD_NDESC_OFFSET		0x00 /* Next descriptor pointer */
 #define XAXIDMA_BD_BUFA_OFFSET		0x08 /* Buffer address */
 #define XAXIDMA_BD_CTRL_LEN_OFFSET	0x18 /* Control/buffer length */
@@ -356,9 +359,6 @@
  * @app2:         MM2S/S2MM User Application Field 2.
  * @app3:         MM2S/S2MM User Application Field 3.
  * @app4:         MM2S/S2MM User Application Field 4.
- * @sw_id_offset: MM2S/S2MM Sw ID
- * @reserved5:    Reserved and not used
- * @reserved6:    Reserved and not used
  */
 struct axidma_bd {
 	u32 next;	/* Physical address of next buffer descriptor */
@@ -373,11 +373,9 @@ struct axidma_bd {
 	u32 app1;	/* TX start << 16 | insert */
 	u32 app2;	/* TX csum seed */
 	u32 app3;
-	u32 app4;
-	u32 sw_id_offset;
-	u32 reserved5;
-	u32 reserved6;
-};
+	u32 app4;   /* Last field used by HW */
+	struct sk_buff *skb;
+} __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
 
 /**
  * struct axienet_local - axienet private per device data
@@ -385,6 +383,7 @@ struct axidma_bd {
  * @dev:	Pointer to device structure
  * @phy_node:	Pointer to device node structure
  * @mii_bus:	Pointer to MII bus structure
+ * @regs_start: Resource start for axienet device addresses
  * @regs:	Base address for the axienet_local device address space
  * @dma_regs:	Base address for the axidma device address space
  * @dma_err_tasklet: Tasklet structure to process Axi DMA errors
@@ -422,10 +421,17 @@ struct axienet_local {
 	/* Connection to PHY device */
 	struct device_node *phy_node;
 
+	struct phylink *phylink;
+	struct phylink_config phylink_config;
+
+	/* Clock for AXI bus */
+	struct clk *clk;
+
 	/* MDIO bus data */
 	struct mii_bus *mii_bus;	/* MII bus reference */
 
 	/* IO registers, dma functions and IRQs */
+	resource_size_t regs_start;
 	void __iomem *regs;
 	void __iomem *dma_regs;
 
@@ -433,17 +439,19 @@ struct axienet_local {
 
 	int tx_irq;
 	int rx_irq;
+	int eth_irq;
 	phy_interface_t phy_mode;
 
 	u32 options;			/* Current options word */
-	u32 last_link;
 	u32 features;
 
 	/* Buffer descriptors */
 	struct axidma_bd *tx_bd_v;
 	dma_addr_t tx_bd_p;
+	u32 tx_bd_num;
 	struct axidma_bd *rx_bd_v;
 	dma_addr_t rx_bd_p;
+	u32 rx_bd_num;
 	u32 tx_bd_ci;
 	u32 tx_bd_tail;
 	u32 rx_bd_ci;
@@ -481,7 +489,7 @@ struct axienet_option {
  */
 static inline u32 axienet_ior(struct axienet_local *lp, off_t offset)
 {
-	return in_be32(lp->regs + offset);
+	return ioread32(lp->regs + offset);
 }
 
 static inline u32 axinet_ior_read_mcr(struct axienet_local *lp)
@@ -501,12 +509,13 @@ static inline u32 axinet_ior_read_mcr(struct axienet_local *lp)
 static inline void axienet_iow(struct axienet_local *lp, off_t offset,
 			       u32 value)
 {
-	out_be32((lp->regs + offset), value);
+	iowrite32(value, lp->regs + offset);
 }
 
 /* Function prototypes visible in xilinx_axienet_mdio.c for other files */
-int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np);
-int axienet_mdio_wait_until_ready(struct axienet_local *lp);
+int axienet_mdio_enable(struct axienet_local *lp);
+void axienet_mdio_disable(struct axienet_local *lp);
+int axienet_mdio_setup(struct axienet_local *lp);
 void axienet_mdio_teardown(struct axienet_local *lp);
 
 #endif /* XILINX_AXI_ENET_H */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 831967f6eff8..4fc627fb4d11 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -7,6 +7,7 @@
  * Copyright (c) 2008-2009 Secret Lab Technologies Ltd.
  * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (c) 2010 - 2011 PetaLogix
+ * Copyright (c) 2019 SED Systems, a division of Calian Ltd.
  * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved.
  *
  * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6
@@ -21,6 +22,7 @@
  *  - Add support for extended VLAN support.
  */
 
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
@@ -38,16 +40,18 @@
 
 #include "xilinx_axienet.h"
 
-/* Descriptors defines for Tx and Rx DMA - 2^n for the best performance */
-#define TX_BD_NUM		64
-#define RX_BD_NUM		128
+/* Descriptors defines for Tx and Rx DMA */
+#define TX_BD_NUM_DEFAULT		64
+#define RX_BD_NUM_DEFAULT		1024
+#define TX_BD_NUM_MAX			4096
+#define RX_BD_NUM_MAX			4096
 
 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */
 #define DRIVER_NAME		"xaxienet"
 #define DRIVER_DESCRIPTION	"Xilinx Axi Ethernet driver"
 #define DRIVER_VERSION		"1.00a"
 
-#define AXIENET_REGS_N		32
+#define AXIENET_REGS_N		40
 
 /* Match table for of_platform binding */
 static const struct of_device_id axienet_of_match[] = {
@@ -125,7 +129,7 @@ static struct axienet_option axienet_options[] = {
  */
 static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg)
 {
-	return in_be32(lp->dma_regs + reg);
+	return ioread32(lp->dma_regs + reg);
 }
 
 /**
@@ -140,7 +144,7 @@ static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg)
 static inline void axienet_dma_out32(struct axienet_local *lp,
 				     off_t reg, u32 value)
 {
-	out_be32((lp->dma_regs + reg), value);
+	iowrite32(value, lp->dma_regs + reg);
 }
 
 /**
@@ -156,22 +160,21 @@ static void axienet_dma_bd_release(struct net_device *ndev)
 	int i;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		dma_unmap_single(ndev->dev.parent, lp->rx_bd_v[i].phys,
 				 lp->max_frm_size, DMA_FROM_DEVICE);
-		dev_kfree_skb((struct sk_buff *)
-			      (lp->rx_bd_v[i].sw_id_offset));
+		dev_kfree_skb(lp->rx_bd_v[i].skb);
 	}
 
 	if (lp->rx_bd_v) {
 		dma_free_coherent(ndev->dev.parent,
-				  sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+				  sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
 				  lp->rx_bd_v,
 				  lp->rx_bd_p);
 	}
 	if (lp->tx_bd_v) {
 		dma_free_coherent(ndev->dev.parent,
-				  sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+				  sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
 				  lp->tx_bd_v,
 				  lp->tx_bd_p);
 	}
@@ -201,33 +204,33 @@ static int axienet_dma_bd_init(struct net_device *ndev)
 
 	/* Allocate the Tx and Rx buffer descriptors. */
 	lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
-					 sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+					 sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
 					 &lp->tx_bd_p, GFP_KERNEL);
 	if (!lp->tx_bd_v)
 		goto out;
 
 	lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
-					 sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+					 sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
 					 &lp->rx_bd_p, GFP_KERNEL);
 	if (!lp->rx_bd_v)
 		goto out;
 
-	for (i = 0; i < TX_BD_NUM; i++) {
+	for (i = 0; i < lp->tx_bd_num; i++) {
 		lp->tx_bd_v[i].next = lp->tx_bd_p +
 				      sizeof(*lp->tx_bd_v) *
-				      ((i + 1) % TX_BD_NUM);
+				      ((i + 1) % lp->tx_bd_num);
 	}
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		lp->rx_bd_v[i].next = lp->rx_bd_p +
 				      sizeof(*lp->rx_bd_v) *
-				      ((i + 1) % RX_BD_NUM);
+				      ((i + 1) % lp->rx_bd_num);
 
 		skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size);
 		if (!skb)
 			goto out;
 
-		lp->rx_bd_v[i].sw_id_offset = (u32) skb;
+		lp->rx_bd_v[i].skb = skb;
 		lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent,
 						     skb->data,
 						     lp->max_frm_size,
@@ -269,7 +272,7 @@ static int axienet_dma_bd_init(struct net_device *ndev)
 	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
 			  cr | XAXIDMA_CR_RUNSTOP_MASK);
 	axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-			  (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+			  (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
@@ -434,17 +437,20 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
 	lp->options |= options;
 }
 
-static void __axienet_device_reset(struct axienet_local *lp, off_t offset)
+static void __axienet_device_reset(struct axienet_local *lp)
 {
 	u32 timeout;
 	/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
 	 * process of Axi DMA takes a while to complete as all pending
 	 * commands/transfers will be flushed or completed during this
 	 * reset process.
+	 * Note that even though both TX and RX have their own reset register,
+	 * they both reset the entire DMA core, so only one needs to be used.
 	 */
-	axienet_dma_out32(lp, offset, XAXIDMA_CR_RESET_MASK);
+	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
 	timeout = DELAY_OF_ONE_MILLISEC;
-	while (axienet_dma_in32(lp, offset) & XAXIDMA_CR_RESET_MASK) {
+	while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
+				XAXIDMA_CR_RESET_MASK) {
 		udelay(1);
 		if (--timeout == 0) {
 			netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
@@ -470,8 +476,7 @@ static void axienet_device_reset(struct net_device *ndev)
 	u32 axienet_status;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	__axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET);
-	__axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET);
+	__axienet_device_reset(lp);
 
 	lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE;
 	lp->options |= XAE_OPTION_VLAN;
@@ -498,6 +503,8 @@ static void axienet_device_reset(struct net_device *ndev)
 	axienet_status = axienet_ior(lp, XAE_IP_OFFSET);
 	if (axienet_status & XAE_INT_RXRJECT_MASK)
 		axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK);
+	axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ?
+		    XAE_INT_RECV_ERROR_MASK : 0);
 
 	axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK);
 
@@ -514,63 +521,6 @@ static void axienet_device_reset(struct net_device *ndev)
 }
 
 /**
- * axienet_adjust_link - Adjust the PHY link speed/duplex.
- * @ndev:	Pointer to the net_device structure
- *
- * This function is called to change the speed and duplex setting after
- * auto negotiation is done by the PHY. This is the function that gets
- * registered with the PHY interface through the "of_phy_connect" call.
- */
-static void axienet_adjust_link(struct net_device *ndev)
-{
-	u32 emmc_reg;
-	u32 link_state;
-	u32 setspeed = 1;
-	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phy = ndev->phydev;
-
-	link_state = phy->speed | (phy->duplex << 1) | phy->link;
-	if (lp->last_link != link_state) {
-		if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) {
-			if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX)
-				setspeed = 0;
-		} else {
-			if ((phy->speed == SPEED_1000) &&
-			    (lp->phy_mode == PHY_INTERFACE_MODE_MII))
-				setspeed = 0;
-		}
-
-		if (setspeed == 1) {
-			emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
-			emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK;
-
-			switch (phy->speed) {
-			case SPEED_1000:
-				emmc_reg |= XAE_EMMC_LINKSPD_1000;
-				break;
-			case SPEED_100:
-				emmc_reg |= XAE_EMMC_LINKSPD_100;
-				break;
-			case SPEED_10:
-				emmc_reg |= XAE_EMMC_LINKSPD_10;
-				break;
-			default:
-				dev_err(&ndev->dev, "Speed other than 10, 100 "
-					"or 1Gbps is not supported\n");
-				break;
-			}
-
-			axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg);
-			lp->last_link = link_state;
-			phy_print_status(phy);
-		} else {
-			netdev_err(ndev,
-				   "Error setting Axi Ethernet mac speed\n");
-		}
-	}
-}
-
-/**
  * axienet_start_xmit_done - Invoked once a transmit is completed by the
  * Axi DMA Tx channel.
  * @ndev:	Pointer to the net_device structure
@@ -595,26 +545,31 @@ static void axienet_start_xmit_done(struct net_device *ndev)
 		dma_unmap_single(ndev->dev.parent, cur_p->phys,
 				(cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
 				DMA_TO_DEVICE);
-		if (cur_p->app4)
-			dev_consume_skb_irq((struct sk_buff *)cur_p->app4);
+		if (cur_p->skb)
+			dev_consume_skb_irq(cur_p->skb);
 		/*cur_p->phys = 0;*/
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
 		cur_p->app4 = 0;
 		cur_p->status = 0;
+		cur_p->skb = NULL;
 
 		size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
 		packets++;
 
-		++lp->tx_bd_ci;
-		lp->tx_bd_ci %= TX_BD_NUM;
+		if (++lp->tx_bd_ci >= lp->tx_bd_num)
+			lp->tx_bd_ci = 0;
 		cur_p = &lp->tx_bd_v[lp->tx_bd_ci];
 		status = cur_p->status;
 	}
 
 	ndev->stats.tx_packets += packets;
 	ndev->stats.tx_bytes += size;
+
+	/* Matches barrier in axienet_start_xmit */
+	smp_mb();
+
 	netif_wake_queue(ndev);
 }
 
@@ -635,7 +590,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
 					    int num_frag)
 {
 	struct axidma_bd *cur_p;
-	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % TX_BD_NUM];
+	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
 	if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
 		return NETDEV_TX_BUSY;
 	return 0;
@@ -670,9 +625,19 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
 	if (axienet_check_tx_bd_space(lp, num_frag)) {
-		if (!netif_queue_stopped(ndev))
-			netif_stop_queue(ndev);
-		return NETDEV_TX_BUSY;
+		if (netif_queue_stopped(ndev))
+			return NETDEV_TX_BUSY;
+
+		netif_stop_queue(ndev);
+
+		/* Matches barrier in axienet_start_xmit_done */
+		smp_mb();
+
+		/* Space might have just been freed - check again */
+		if (axienet_check_tx_bd_space(lp, num_frag))
+			return NETDEV_TX_BUSY;
+
+		netif_wake_queue(ndev);
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -695,8 +660,8 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 				     skb_headlen(skb), DMA_TO_DEVICE);
 
 	for (ii = 0; ii < num_frag; ii++) {
-		++lp->tx_bd_tail;
-		lp->tx_bd_tail %= TX_BD_NUM;
+		if (++lp->tx_bd_tail >= lp->tx_bd_num)
+			lp->tx_bd_tail = 0;
 		cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 		frag = &skb_shinfo(skb)->frags[ii];
 		cur_p->phys = dma_map_single(ndev->dev.parent,
@@ -707,13 +672,13 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	}
 
 	cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK;
-	cur_p->app4 = (unsigned long)skb;
+	cur_p->skb = skb;
 
 	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
 	/* Start the transfer */
 	axienet_dma_out32(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p);
-	++lp->tx_bd_tail;
-	lp->tx_bd_tail %= TX_BD_NUM;
+	if (++lp->tx_bd_tail >= lp->tx_bd_num)
+		lp->tx_bd_tail = 0;
 
 	return NETDEV_TX_OK;
 }
@@ -742,13 +707,15 @@ static void axienet_recv(struct net_device *ndev)
 
 	while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
 		tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
-		skb = (struct sk_buff *) (cur_p->sw_id_offset);
-		length = cur_p->app4 & 0x0000FFFF;
 
 		dma_unmap_single(ndev->dev.parent, cur_p->phys,
 				 lp->max_frm_size,
 				 DMA_FROM_DEVICE);
 
+		skb = cur_p->skb;
+		cur_p->skb = NULL;
+		length = cur_p->app4 & 0x0000FFFF;
+
 		skb_put(skb, length);
 		skb->protocol = eth_type_trans(skb, ndev);
 		/*skb_checksum_none_assert(skb);*/
@@ -783,10 +750,10 @@ static void axienet_recv(struct net_device *ndev)
 					     DMA_FROM_DEVICE);
 		cur_p->cntrl = lp->max_frm_size;
 		cur_p->status = 0;
-		cur_p->sw_id_offset = (u32) new_skb;
+		cur_p->skb = new_skb;
 
-		++lp->rx_bd_ci;
-		lp->rx_bd_ci %= RX_BD_NUM;
+		if (++lp->rx_bd_ci >= lp->rx_bd_num)
+			lp->rx_bd_ci = 0;
 		cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
 	}
 
@@ -802,7 +769,7 @@ static void axienet_recv(struct net_device *ndev)
  * @irq:	irq number
  * @_ndev:	net_device pointer
  *
- * Return: IRQ_HANDLED for all cases.
+ * Return: IRQ_HANDLED if device generated a TX interrupt, IRQ_NONE otherwise.
  *
  * This is the Axi DMA Tx done Isr. It invokes "axienet_start_xmit_done"
  * to complete the BD processing.
@@ -821,7 +788,7 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev)
 		goto out;
 	}
 	if (!(status & XAXIDMA_IRQ_ALL_MASK))
-		dev_err(&ndev->dev, "No interrupts asserted in Tx path\n");
+		return IRQ_NONE;
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
 		dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status);
 		dev_err(&ndev->dev, "Current BD is at: 0x%x\n",
@@ -851,7 +818,7 @@ out:
  * @irq:	irq number
  * @_ndev:	net_device pointer
  *
- * Return: IRQ_HANDLED for all cases.
+ * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise.
  *
  * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD
  * processing.
@@ -870,7 +837,7 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev)
 		goto out;
 	}
 	if (!(status & XAXIDMA_IRQ_ALL_MASK))
-		dev_err(&ndev->dev, "No interrupts asserted in Rx path\n");
+		return IRQ_NONE;
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
 		dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status);
 		dev_err(&ndev->dev, "Current BD is at: 0x%x\n",
@@ -895,6 +862,35 @@ out:
 	return IRQ_HANDLED;
 }
 
+/**
+ * axienet_eth_irq - Ethernet core Isr.
+ * @irq:	irq number
+ * @_ndev:	net_device pointer
+ *
+ * Return: IRQ_HANDLED if device generated a core interrupt, IRQ_NONE otherwise.
+ *
+ * Handle miscellaneous conditions indicated by Ethernet core IRQ.
+ */
+static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
+{
+	struct net_device *ndev = _ndev;
+	struct axienet_local *lp = netdev_priv(ndev);
+	unsigned int pending;
+
+	pending = axienet_ior(lp, XAE_IP_OFFSET);
+	if (!pending)
+		return IRQ_NONE;
+
+	if (pending & XAE_INT_RXFIFOOVR_MASK)
+		ndev->stats.rx_missed_errors++;
+
+	if (pending & XAE_INT_RXRJECT_MASK)
+		ndev->stats.rx_frame_errors++;
+
+	axienet_iow(lp, XAE_IS_OFFSET, pending);
+	return IRQ_HANDLED;
+}
+
 static void axienet_dma_err_handler(unsigned long data);
 
 /**
@@ -904,67 +900,72 @@ static void axienet_dma_err_handler(unsigned long data);
  * Return: 0, on success.
  *	    non-zero error value on failure
  *
- * This is the driver open routine. It calls phy_start to start the PHY device.
+ * This is the driver open routine. It calls phylink_start to start the
+ * PHY device.
  * It also allocates interrupt service routines, enables the interrupt lines
  * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
  * descriptors are initialized.
  */
 static int axienet_open(struct net_device *ndev)
 {
-	int ret, mdio_mcreg;
+	int ret;
 	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = NULL;
 
 	dev_dbg(&ndev->dev, "axienet_open()\n");
 
-	mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET);
-	ret = axienet_mdio_wait_until_ready(lp);
-	if (ret < 0)
-		return ret;
 	/* Disable the MDIO interface till Axi Ethernet Reset is completed.
 	 * When we do an Axi Ethernet reset, it resets the complete core
-	 * including the MDIO. If MDIO is not disabled when the reset
-	 * process is started, MDIO will be broken afterwards.
+	 * including the MDIO. MDIO must be disabled before resetting
+	 * and re-enabled afterwards.
+	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
 	 */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET,
-		    (mdio_mcreg & (~XAE_MDIO_MC_MDIOEN_MASK)));
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
 	axienet_device_reset(ndev);
-	/* Enable the MDIO */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg);
-	ret = axienet_mdio_wait_until_ready(lp);
+	ret = axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
 	if (ret < 0)
 		return ret;
 
-	if (lp->phy_node) {
-		phydev = of_phy_connect(lp->ndev, lp->phy_node,
-					axienet_adjust_link, 0, lp->phy_mode);
-
-		if (!phydev)
-			dev_err(lp->dev, "of_phy_connect() failed\n");
-		else
-			phy_start(phydev);
+	ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
+	if (ret) {
+		dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
+		return ret;
 	}
 
+	phylink_start(lp->phylink);
+
 	/* Enable tasklets for Axi DMA error handling */
 	tasklet_init(&lp->dma_err_tasklet, axienet_dma_err_handler,
 		     (unsigned long) lp);
 
 	/* Enable interrupts for Axi DMA Tx */
-	ret = request_irq(lp->tx_irq, axienet_tx_irq, 0, ndev->name, ndev);
+	ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED,
+			  ndev->name, ndev);
 	if (ret)
 		goto err_tx_irq;
 	/* Enable interrupts for Axi DMA Rx */
-	ret = request_irq(lp->rx_irq, axienet_rx_irq, 0, ndev->name, ndev);
+	ret = request_irq(lp->rx_irq, axienet_rx_irq, IRQF_SHARED,
+			  ndev->name, ndev);
 	if (ret)
 		goto err_rx_irq;
+	/* Enable interrupts for Axi Ethernet core (if defined) */
+	if (lp->eth_irq > 0) {
+		ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED,
+				  ndev->name, ndev);
+		if (ret)
+			goto err_eth_irq;
+	}
 
 	return 0;
 
+err_eth_irq:
+	free_irq(lp->rx_irq, ndev);
 err_rx_irq:
 	free_irq(lp->tx_irq, ndev);
 err_tx_irq:
-	if (phydev)
-		phy_disconnect(phydev);
+	phylink_stop(lp->phylink);
+	phylink_disconnect_phy(lp->phylink);
 	tasklet_kill(&lp->dma_err_tasklet);
 	dev_err(lp->dev, "request_irq() failed\n");
 	return ret;
@@ -976,34 +977,61 @@ err_tx_irq:
  *
  * Return: 0, on success.
  *
- * This is the driver stop routine. It calls phy_disconnect to stop the PHY
+ * This is the driver stop routine. It calls phylink_disconnect to stop the PHY
  * device. It also removes the interrupt handlers and disables the interrupts.
  * The Axi DMA Tx/Rx BDs are released.
  */
 static int axienet_stop(struct net_device *ndev)
 {
-	u32 cr;
+	u32 cr, sr;
+	int count;
 	struct axienet_local *lp = netdev_priv(ndev);
 
 	dev_dbg(&ndev->dev, "axienet_close()\n");
 
-	cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
-			  cr & (~XAXIDMA_CR_RUNSTOP_MASK));
-	cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET,
-			  cr & (~XAXIDMA_CR_RUNSTOP_MASK));
+	phylink_stop(lp->phylink);
+	phylink_disconnect_phy(lp->phylink);
+
 	axienet_setoptions(ndev, lp->options &
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
 
+	cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+	cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+
+	cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+	cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+
+	axienet_iow(lp, XAE_IE_OFFSET, 0);
+
+	/* Give DMAs a chance to halt gracefully */
+	sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+		msleep(20);
+		sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	}
+
+	sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+		msleep(20);
+		sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	}
+
+	/* Do a reset to ensure DMA is really stopped */
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
+	__axienet_device_reset(lp);
+	axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
+
 	tasklet_kill(&lp->dma_err_tasklet);
 
+	if (lp->eth_irq > 0)
+		free_irq(lp->eth_irq, ndev);
 	free_irq(lp->tx_irq, ndev);
 	free_irq(lp->rx_irq, ndev);
 
-	if (ndev->phydev)
-		phy_disconnect(ndev->phydev);
-
 	axienet_dma_bd_release(ndev);
 	return 0;
 }
@@ -1151,6 +1179,48 @@ static void axienet_ethtools_get_regs(struct net_device *ndev,
 	data[29] = axienet_ior(lp, XAE_FMI_OFFSET);
 	data[30] = axienet_ior(lp, XAE_AF0_OFFSET);
 	data[31] = axienet_ior(lp, XAE_AF1_OFFSET);
+	data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+	data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
+	data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
+	data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+	data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
+	data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+}
+
+static void axienet_ethtools_get_ringparam(struct net_device *ndev,
+					   struct ethtool_ringparam *ering)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	ering->rx_max_pending = RX_BD_NUM_MAX;
+	ering->rx_mini_max_pending = 0;
+	ering->rx_jumbo_max_pending = 0;
+	ering->tx_max_pending = TX_BD_NUM_MAX;
+	ering->rx_pending = lp->rx_bd_num;
+	ering->rx_mini_pending = 0;
+	ering->rx_jumbo_pending = 0;
+	ering->tx_pending = lp->tx_bd_num;
+}
+
+static int axienet_ethtools_set_ringparam(struct net_device *ndev,
+					  struct ethtool_ringparam *ering)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	if (ering->rx_pending > RX_BD_NUM_MAX ||
+	    ering->rx_mini_pending ||
+	    ering->rx_jumbo_pending ||
+	    ering->rx_pending > TX_BD_NUM_MAX)
+		return -EINVAL;
+
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	lp->rx_bd_num = ering->rx_pending;
+	lp->tx_bd_num = ering->tx_pending;
+	return 0;
 }
 
 /**
@@ -1166,12 +1236,9 @@ static void
 axienet_ethtools_get_pauseparam(struct net_device *ndev,
 				struct ethtool_pauseparam *epauseparm)
 {
-	u32 regval;
 	struct axienet_local *lp = netdev_priv(ndev);
-	epauseparm->autoneg  = 0;
-	regval = axienet_ior(lp, XAE_FCC_OFFSET);
-	epauseparm->tx_pause = regval & XAE_FCC_FCTX_MASK;
-	epauseparm->rx_pause = regval & XAE_FCC_FCRX_MASK;
+
+	phylink_ethtool_get_pauseparam(lp->phylink, epauseparm);
 }
 
 /**
@@ -1190,27 +1257,9 @@ static int
 axienet_ethtools_set_pauseparam(struct net_device *ndev,
 				struct ethtool_pauseparam *epauseparm)
 {
-	u32 regval = 0;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	if (netif_running(ndev)) {
-		netdev_err(ndev,
-			   "Please stop netif before applying configuration\n");
-		return -EFAULT;
-	}
-
-	regval = axienet_ior(lp, XAE_FCC_OFFSET);
-	if (epauseparm->tx_pause)
-		regval |= XAE_FCC_FCTX_MASK;
-	else
-		regval &= ~XAE_FCC_FCTX_MASK;
-	if (epauseparm->rx_pause)
-		regval |= XAE_FCC_FCRX_MASK;
-	else
-		regval &= ~XAE_FCC_FCRX_MASK;
-	axienet_iow(lp, XAE_FCC_OFFSET, regval);
-
-	return 0;
+	return phylink_ethtool_set_pauseparam(lp->phylink, epauseparm);
 }
 
 /**
@@ -1289,17 +1338,170 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev,
 	return 0;
 }
 
+static int
+axienet_ethtools_get_link_ksettings(struct net_device *ndev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_get(lp->phylink, cmd);
+}
+
+static int
+axienet_ethtools_set_link_ksettings(struct net_device *ndev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_set(lp->phylink, cmd);
+}
+
 static const struct ethtool_ops axienet_ethtool_ops = {
 	.get_drvinfo    = axienet_ethtools_get_drvinfo,
 	.get_regs_len   = axienet_ethtools_get_regs_len,
 	.get_regs       = axienet_ethtools_get_regs,
 	.get_link       = ethtool_op_get_link,
+	.get_ringparam	= axienet_ethtools_get_ringparam,
+	.set_ringparam	= axienet_ethtools_set_ringparam,
 	.get_pauseparam = axienet_ethtools_get_pauseparam,
 	.set_pauseparam = axienet_ethtools_set_pauseparam,
 	.get_coalesce   = axienet_ethtools_get_coalesce,
 	.set_coalesce   = axienet_ethtools_set_coalesce,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link_ksettings = axienet_ethtools_get_link_ksettings,
+	.set_link_ksettings = axienet_ethtools_set_link_ksettings,
+};
+
+static void axienet_validate(struct phylink_config *config,
+			     unsigned long *supported,
+			     struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	/* Only support the mode we are configured for */
+	if (state->interface != PHY_INTERFACE_MODE_NA &&
+	    state->interface != lp->phy_mode) {
+		netdev_warn(ndev, "Cannot use PHY mode %s, supported: %s\n",
+			    phy_modes(state->interface),
+			    phy_modes(lp->phy_mode));
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		return;
+	}
+
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+
+	phylink_set(mask, Asym_Pause);
+	phylink_set(mask, Pause);
+	phylink_set(mask, 1000baseX_Full);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Full);
+	phylink_set(mask, 1000baseT_Full);
+
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int axienet_mac_link_state(struct phylink_config *config,
+				  struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	u32 emmc_reg, fcc_reg;
+
+	state->interface = lp->phy_mode;
+
+	emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
+	if (emmc_reg & XAE_EMMC_LINKSPD_1000)
+		state->speed = SPEED_1000;
+	else if (emmc_reg & XAE_EMMC_LINKSPD_100)
+		state->speed = SPEED_100;
+	else
+		state->speed = SPEED_10;
+
+	state->pause = 0;
+	fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET);
+	if (fcc_reg & XAE_FCC_FCTX_MASK)
+		state->pause |= MLO_PAUSE_TX;
+	if (fcc_reg & XAE_FCC_FCRX_MASK)
+		state->pause |= MLO_PAUSE_RX;
+
+	state->an_complete = 0;
+	state->duplex = 1;
+
+	return 1;
+}
+
+static void axienet_mac_an_restart(struct phylink_config *config)
+{
+	/* Unsupported, do nothing */
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+			       const struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	u32 emmc_reg, fcc_reg;
+
+	emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
+	emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK;
+
+	switch (state->speed) {
+	case SPEED_1000:
+		emmc_reg |= XAE_EMMC_LINKSPD_1000;
+		break;
+	case SPEED_100:
+		emmc_reg |= XAE_EMMC_LINKSPD_100;
+		break;
+	case SPEED_10:
+		emmc_reg |= XAE_EMMC_LINKSPD_10;
+		break;
+	default:
+		dev_err(&ndev->dev,
+			"Speed other than 10, 100 or 1Gbps is not supported\n");
+		break;
+	}
+
+	axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg);
+
+	fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET);
+	if (state->pause & MLO_PAUSE_TX)
+		fcc_reg |= XAE_FCC_FCTX_MASK;
+	else
+		fcc_reg &= ~XAE_FCC_FCTX_MASK;
+	if (state->pause & MLO_PAUSE_RX)
+		fcc_reg |= XAE_FCC_FCRX_MASK;
+	else
+		fcc_reg &= ~XAE_FCC_FCRX_MASK;
+	axienet_iow(lp, XAE_FCC_OFFSET, fcc_reg);
+}
+
+static void axienet_mac_link_down(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface)
+{
+	/* nothing meaningful to do */
+}
+
+static void axienet_mac_link_up(struct phylink_config *config,
+				unsigned int mode,
+				phy_interface_t interface,
+				struct phy_device *phy)
+{
+	/* nothing meaningful to do */
+}
+
+static const struct phylink_mac_ops axienet_phylink_ops = {
+	.validate = axienet_validate,
+	.mac_link_state = axienet_mac_link_state,
+	.mac_an_restart = axienet_mac_an_restart,
+	.mac_config = axienet_mac_config,
+	.mac_link_down = axienet_mac_link_down,
+	.mac_link_up = axienet_mac_link_up,
 };
 
 /**
@@ -1313,38 +1515,33 @@ static void axienet_dma_err_handler(unsigned long data)
 {
 	u32 axienet_status;
 	u32 cr, i;
-	int mdio_mcreg;
 	struct axienet_local *lp = (struct axienet_local *) data;
 	struct net_device *ndev = lp->ndev;
 	struct axidma_bd *cur_p;
 
 	axienet_setoptions(ndev, lp->options &
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
-	mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET);
-	axienet_mdio_wait_until_ready(lp);
 	/* Disable the MDIO interface till Axi Ethernet Reset is completed.
 	 * When we do an Axi Ethernet reset, it resets the complete core
-	 * including the MDIO. So if MDIO is not disabled when the reset
-	 * process is started, MDIO will be broken afterwards.
+	 * including the MDIO. MDIO must be disabled before resetting
+	 * and re-enabled afterwards.
+	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
 	 */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg &
-		    ~XAE_MDIO_MC_MDIOEN_MASK));
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
+	__axienet_device_reset(lp);
+	axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
 
-	__axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET);
-	__axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET);
-
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg);
-	axienet_mdio_wait_until_ready(lp);
-
-	for (i = 0; i < TX_BD_NUM; i++) {
+	for (i = 0; i < lp->tx_bd_num; i++) {
 		cur_p = &lp->tx_bd_v[i];
 		if (cur_p->phys)
 			dma_unmap_single(ndev->dev.parent, cur_p->phys,
 					 (cur_p->cntrl &
 					  XAXIDMA_BD_CTRL_LENGTH_MASK),
 					 DMA_TO_DEVICE);
-		if (cur_p->app4)
-			dev_kfree_skb_irq((struct sk_buff *) cur_p->app4);
+		if (cur_p->skb)
+			dev_kfree_skb_irq(cur_p->skb);
 		cur_p->phys = 0;
 		cur_p->cntrl = 0;
 		cur_p->status = 0;
@@ -1353,10 +1550,10 @@ static void axienet_dma_err_handler(unsigned long data)
 		cur_p->app2 = 0;
 		cur_p->app3 = 0;
 		cur_p->app4 = 0;
-		cur_p->sw_id_offset = 0;
+		cur_p->skb = NULL;
 	}
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		cur_p = &lp->rx_bd_v[i];
 		cur_p->status = 0;
 		cur_p->app0 = 0;
@@ -1404,7 +1601,7 @@ static void axienet_dma_err_handler(unsigned long data)
 	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
 			  cr | XAXIDMA_CR_RUNSTOP_MASK);
 	axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-			  (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+			  (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
@@ -1422,6 +1619,8 @@ static void axienet_dma_err_handler(unsigned long data)
 	axienet_status = axienet_ior(lp, XAE_IP_OFFSET);
 	if (axienet_status & XAE_INT_RXRJECT_MASK)
 		axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK);
+	axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ?
+		    XAE_INT_RECV_ERROR_MASK : 0);
 	axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK);
 
 	/* Sync default options with HW but leave receiver and
@@ -1453,7 +1652,7 @@ static int axienet_probe(struct platform_device *pdev)
 	struct axienet_local *lp;
 	struct net_device *ndev;
 	const void *mac_addr;
-	struct resource *ethres, dmares;
+	struct resource *ethres;
 	u32 value;
 
 	ndev = alloc_etherdev(sizeof(*lp));
@@ -1476,6 +1675,8 @@ static int axienet_probe(struct platform_device *pdev)
 	lp->ndev = ndev;
 	lp->dev = &pdev->dev;
 	lp->options = XAE_OPTION_DEFAULTS;
+	lp->rx_bd_num = RX_BD_NUM_DEFAULT;
+	lp->tx_bd_num = TX_BD_NUM_DEFAULT;
 	/* Map device registers */
 	ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
@@ -1484,6 +1685,7 @@ static int axienet_probe(struct platform_device *pdev)
 		ret = PTR_ERR(lp->regs);
 		goto free_netdev;
 	}
+	lp->regs_start = ethres->start;
 
 	/* Setup checksum offload, but default to off if not specified */
 	lp->features = 0;
@@ -1568,38 +1770,56 @@ static int axienet_probe(struct platform_device *pdev)
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
-	if (!np) {
-		dev_err(&pdev->dev, "could not find DMA node\n");
-		ret = -ENODEV;
-		goto free_netdev;
-	}
-	ret = of_address_to_resource(np, 0, &dmares);
-	if (ret) {
-		dev_err(&pdev->dev, "unable to get DMA resource\n");
+	if (np) {
+		struct resource dmares;
+
+		ret = of_address_to_resource(np, 0, &dmares);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"unable to get DMA resource\n");
+			of_node_put(np);
+			goto free_netdev;
+		}
+		lp->dma_regs = devm_ioremap_resource(&pdev->dev,
+						     &dmares);
+		lp->rx_irq = irq_of_parse_and_map(np, 1);
+		lp->tx_irq = irq_of_parse_and_map(np, 0);
 		of_node_put(np);
-		goto free_netdev;
+		lp->eth_irq = platform_get_irq(pdev, 0);
+	} else {
+		/* Check for these resources directly on the Ethernet node. */
+		struct resource *res = platform_get_resource(pdev,
+							     IORESOURCE_MEM, 1);
+		if (!res) {
+			dev_err(&pdev->dev, "unable to get DMA memory resource\n");
+			goto free_netdev;
+		}
+		lp->dma_regs = devm_ioremap_resource(&pdev->dev, res);
+		lp->rx_irq = platform_get_irq(pdev, 1);
+		lp->tx_irq = platform_get_irq(pdev, 0);
+		lp->eth_irq = platform_get_irq(pdev, 2);
 	}
-	lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares);
 	if (IS_ERR(lp->dma_regs)) {
 		dev_err(&pdev->dev, "could not map DMA regs\n");
 		ret = PTR_ERR(lp->dma_regs);
-		of_node_put(np);
 		goto free_netdev;
 	}
-	lp->rx_irq = irq_of_parse_and_map(np, 1);
-	lp->tx_irq = irq_of_parse_and_map(np, 0);
-	of_node_put(np);
 	if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) {
 		dev_err(&pdev->dev, "could not determine irqs\n");
 		ret = -ENOMEM;
 		goto free_netdev;
 	}
 
+	/* Check for Ethernet core IRQ (optional) */
+	if (lp->eth_irq <= 0)
+		dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
+
 	/* Retrieve the MAC address */
 	mac_addr = of_get_mac_address(pdev->dev.of_node);
 	if (IS_ERR(mac_addr)) {
-		dev_err(&pdev->dev, "could not find MAC address\n");
-		goto free_netdev;
+		dev_warn(&pdev->dev, "could not find MAC address property: %ld\n",
+			 PTR_ERR(mac_addr));
+		mac_addr = NULL;
 	}
 	axienet_set_mac_address(ndev, mac_addr);
 
@@ -1608,9 +1828,36 @@ static int axienet_probe(struct platform_device *pdev)
 
 	lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
 	if (lp->phy_node) {
-		ret = axienet_mdio_setup(lp, pdev->dev.of_node);
+		lp->clk = devm_clk_get(&pdev->dev, NULL);
+		if (IS_ERR(lp->clk)) {
+			dev_warn(&pdev->dev, "Failed to get clock: %ld\n",
+				 PTR_ERR(lp->clk));
+			lp->clk = NULL;
+		} else {
+			ret = clk_prepare_enable(lp->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "Unable to enable clock: %d\n",
+					ret);
+				goto free_netdev;
+			}
+		}
+
+		ret = axienet_mdio_setup(lp);
 		if (ret)
-			dev_warn(&pdev->dev, "error registering MDIO bus\n");
+			dev_warn(&pdev->dev,
+				 "error registering MDIO bus: %d\n", ret);
+	}
+
+	lp->phylink_config.dev = &ndev->dev;
+	lp->phylink_config.type = PHYLINK_NETDEV;
+
+	lp->phylink = phylink_create(&lp->phylink_config, pdev->dev.fwnode,
+				     lp->phy_mode,
+				     &axienet_phylink_ops);
+	if (IS_ERR(lp->phylink)) {
+		ret = PTR_ERR(lp->phylink);
+		dev_err(&pdev->dev, "phylink_create error (%i)\n", ret);
+		goto free_netdev;
 	}
 
 	ret = register_netdev(lp->ndev);
@@ -1632,9 +1879,16 @@ static int axienet_remove(struct platform_device *pdev)
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	axienet_mdio_teardown(lp);
 	unregister_netdev(ndev);
 
+	if (lp->phylink)
+		phylink_destroy(lp->phylink);
+
+	axienet_mdio_teardown(lp);
+
+	if (lp->clk)
+		clk_disable_unprepare(lp->clk);
+
 	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;
 
@@ -1643,9 +1897,23 @@ static int axienet_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void axienet_shutdown(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+
+	rtnl_lock();
+	netif_device_detach(ndev);
+
+	if (netif_running(ndev))
+		dev_close(ndev);
+
+	rtnl_unlock();
+}
+
 static struct platform_driver axienet_driver = {
 	.probe = axienet_probe,
 	.remove = axienet_remove,
+	.shutdown = axienet_shutdown,
 	.driver = {
 		 .name = "xilinx_axienet",
 		 .of_match_table = axienet_of_match,
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
index 704babdbc8a2..435ed308d990 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
@@ -5,9 +5,11 @@
  * Copyright (c) 2009 Secret Lab Technologies, Ltd.
  * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (c) 2010 - 2011 PetaLogix
+ * Copyright (c) 2019 SED Systems, a division of Calian Ltd.
  * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved.
  */
 
+#include <linux/clk.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/jiffies.h>
@@ -16,10 +18,10 @@
 #include "xilinx_axienet.h"
 
 #define MAX_MDIO_FREQ		2500000 /* 2.5 MHz */
-#define DEFAULT_CLOCK_DIVISOR	XAE_MDIO_DIV_DFT
+#define DEFAULT_HOST_CLOCK	150000000 /* 150 MHz */
 
 /* Wait till MDIO interface is ready to accept a new transaction.*/
-int axienet_mdio_wait_until_ready(struct axienet_local *lp)
+static int axienet_mdio_wait_until_ready(struct axienet_local *lp)
 {
 	u32 val;
 
@@ -112,23 +114,42 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg,
 }
 
 /**
- * axienet_mdio_setup - MDIO setup function
+ * axienet_mdio_enable - MDIO hardware setup function
  * @lp:		Pointer to axienet local data structure.
- * @np:		Pointer to device node
  *
- * Return:	0 on success, -ETIMEDOUT on a timeout, -ENOMEM when
- *		mdiobus_alloc (to allocate memory for mii bus structure) fails.
+ * Return:	0 on success, -ETIMEDOUT on a timeout.
  *
  * Sets up the MDIO interface by initializing the MDIO clock and enabling the
- * MDIO interface in hardware. Register the MDIO interface.
+ * MDIO interface in hardware.
  **/
-int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np)
+int axienet_mdio_enable(struct axienet_local *lp)
 {
-	int ret;
 	u32 clk_div, host_clock;
-	struct mii_bus *bus;
-	struct resource res;
-	struct device_node *np1;
+
+	if (lp->clk) {
+		host_clock = clk_get_rate(lp->clk);
+	} else {
+		struct device_node *np1;
+
+		/* Legacy fallback: detect CPU clock frequency and use as AXI
+		 * bus clock frequency. This only works on certain platforms.
+		 */
+		np1 = of_find_node_by_name(NULL, "cpu");
+		if (!np1) {
+			netdev_warn(lp->ndev, "Could not find CPU device node.\n");
+			host_clock = DEFAULT_HOST_CLOCK;
+		} else {
+			int ret = of_property_read_u32(np1, "clock-frequency",
+						       &host_clock);
+			if (ret) {
+				netdev_warn(lp->ndev, "CPU clock-frequency property not found.\n");
+				host_clock = DEFAULT_HOST_CLOCK;
+			}
+			of_node_put(np1);
+		}
+		netdev_info(lp->ndev, "Setting assumed host clock to %u\n",
+			    host_clock);
+	}
 
 	/* clk_div can be calculated by deriving it from the equation:
 	 * fMDIO = fHOST / ((1 + clk_div) * 2)
@@ -155,25 +176,6 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np)
 	 * "clock-frequency" from the CPU
 	 */
 
-	np1 = of_find_node_by_name(NULL, "cpu");
-	if (!np1) {
-		netdev_warn(lp->ndev, "Could not find CPU device node.\n");
-		netdev_warn(lp->ndev,
-			    "Setting MDIO clock divisor to default %d\n",
-			    DEFAULT_CLOCK_DIVISOR);
-		clk_div = DEFAULT_CLOCK_DIVISOR;
-		goto issue;
-	}
-	if (of_property_read_u32(np1, "clock-frequency", &host_clock)) {
-		netdev_warn(lp->ndev, "clock-frequency property not found.\n");
-		netdev_warn(lp->ndev,
-			    "Setting MDIO clock divisor to default %d\n",
-			    DEFAULT_CLOCK_DIVISOR);
-		clk_div = DEFAULT_CLOCK_DIVISOR;
-		of_node_put(np1);
-		goto issue;
-	}
-
 	clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1;
 	/* If there is any remainder from the division of
 	 * fHOST / (MAX_MDIO_FREQ * 2), then we need to add
@@ -186,12 +188,39 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np)
 		   "Setting MDIO clock divisor to %u/%u Hz host clock.\n",
 		   clk_div, host_clock);
 
-	of_node_put(np1);
-issue:
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET,
-		    (((u32) clk_div) | XAE_MDIO_MC_MDIOEN_MASK));
+	axienet_iow(lp, XAE_MDIO_MC_OFFSET, clk_div | XAE_MDIO_MC_MDIOEN_MASK);
 
-	ret = axienet_mdio_wait_until_ready(lp);
+	return axienet_mdio_wait_until_ready(lp);
+}
+
+/**
+ * axienet_mdio_disable - MDIO hardware disable function
+ * @lp:		Pointer to axienet local data structure.
+ *
+ * Disable the MDIO interface in hardware.
+ **/
+void axienet_mdio_disable(struct axienet_local *lp)
+{
+	axienet_iow(lp, XAE_MDIO_MC_OFFSET, 0);
+}
+
+/**
+ * axienet_mdio_setup - MDIO setup function
+ * @lp:		Pointer to axienet local data structure.
+ *
+ * Return:	0 on success, -ETIMEDOUT on a timeout, -ENOMEM when
+ *		mdiobus_alloc (to allocate memory for mii bus structure) fails.
+ *
+ * Sets up the MDIO interface by initializing the MDIO clock and enabling the
+ * MDIO interface in hardware. Register the MDIO interface.
+ **/
+int axienet_mdio_setup(struct axienet_local *lp)
+{
+	struct device_node *mdio_node;
+	struct mii_bus *bus;
+	int ret;
+
+	ret = axienet_mdio_enable(lp);
 	if (ret < 0)
 		return ret;
 
@@ -199,10 +228,8 @@ issue:
 	if (!bus)
 		return -ENOMEM;
 
-	np1 = of_get_parent(lp->phy_node);
-	of_address_to_resource(np1, 0, &res);
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx",
-		 (unsigned long long) res.start);
+	snprintf(bus->id, MII_BUS_ID_SIZE, "axienet-%.8llx",
+		 (unsigned long long)lp->regs_start);
 
 	bus->priv = lp;
 	bus->name = "Xilinx Axi Ethernet MDIO";
@@ -211,7 +238,9 @@ issue:
 	bus->parent = lp->dev;
 	lp->mii_bus = bus;
 
-	ret = of_mdiobus_register(bus, np1);
+	mdio_node = of_get_child_by_name(lp->dev->of_node, "mdio");
+	ret = of_mdiobus_register(bus, mdio_node);
+	of_node_put(mdio_node);
 	if (ret) {
 		mdiobus_free(bus);
 		lp->mii_bus = NULL;